Spaces:

AseemD
/

MultiModalRAG

Sleeping

App Files Files Community

AseemD commited on Jan 19, 2025

Commit

25f5525

verified ·

1 Parent(s): 4e9c666

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
app_utils.py +160 -0
chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/data_level0.bin +3 -0
chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/header.bin +3 -0
chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/length.bin +3 -0
chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/link_lists.bin +3 -0
chroma_langchain_db/chroma.sqlite3 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+chroma_langchain_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

app_utils.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import io
+import re
+import uuid
+import base64
+import chromadb
+import gradio as gr
+import numpy as np
+from PIL import Image
+from io import BytesIO
+from operator import itemgetter
+from IPython.display import HTML, display
+from langchain.vectorstores import Chroma
+from langchain.storage import InMemoryStore
+from langchain.schema.document import Document
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.retrievers.multi_vector import MultiVectorRetriever
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+# Load the vector store and retriever
+vectorstore = Chroma(collection_name="multi_modal_rag",
+                     embedding_function=OpenAIEmbeddings(),
+                     persist_directory="chroma_langchain_db")
+id_key = "doc_id"
+store = InMemoryStore()
+retriever = MultiVectorRetriever(
+    vectorstore=vectorstore,
+    docstore=store,
+    id_key=id_key,
+)
+retriever = vectorstore.as_retriever()
+def plt_img_base64(img_base64):
+    """Disply base64 encoded string as image"""
+    # Create an HTML img tag with the base64 string as the source
+    image_html = f'<img src="data:image/jpeg;base64,{img_base64}" />'
+    # Display the image by rendering the HTML
+    display(HTML(image_html))
+def looks_like_base64(sb):
+    """Check if the string looks like base64"""
+    return re.match("^[A-Za-z0-9+/]+[=]{0,2}$", sb) is not None
+def is_image_data(b64data):
+    """
+    Check if the base64 data is an image by looking at the start of the data
+    """
+    image_signatures = {
+        b"\xff\xd8\xff": "jpg",
+        b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a": "png",
+        b"\x47\x49\x46\x38": "gif",
+        b"\x52\x49\x46\x46": "webp",
+    }
+    try:
+        header = base64.b64decode(b64data)[:8]  # Decode and get the first 8 bytes
+        for sig, format in image_signatures.items():
+            if header.startswith(sig):
+                return True
+        return False
+    except Exception:
+        return False
+def resize_base64_image(base64_string, size=(128, 128)):
+    """
+    Resize an image encoded as a Base64 string
+    """
+    # Decode the Base64 string
+    img_data = base64.b64decode(base64_string)
+    img = Image.open(io.BytesIO(img_data))
+    # Resize the image
+    resized_img = img.resize(size, Image.LANCZOS)
+    # Save the resized image to a bytes buffer
+    buffered = io.BytesIO()
+    resized_img.save(buffered, format=img.format)
+    # Encode the resized image to Base64
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def split_image_text_types(docs):
+    """
+    Split base64-encoded images and texts
+    """
+    b64_images = []
+    texts = []
+    for doc in docs:
+        # Check if the document is of type Document and extract page_content if so
+        if isinstance(doc, Document):
+            doc = doc.page_content
+        if looks_like_base64(doc) and is_image_data(doc):
+            doc = resize_base64_image(doc, size=(1300, 600))
+            b64_images.append(doc)
+        else:
+            texts.append(doc)
+    return {"images": b64_images, "texts": texts}
+def img_prompt_func(data_dict):
+    """
+    Join the context into a single string
+    """
+    formatted_texts = "\n".join(data_dict["context"]["texts"])
+    messages = []
+    # Adding image(s) to the messages if present
+    if data_dict["context"]["images"]:
+        for image in data_dict["context"]["images"]:
+            image_message = {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{image}"},
+            }
+            messages.append(image_message)
+    # Adding the text for analysis
+    text_message = {
+        "type": "text",
+        "text": (
+            "Answer the question based on the following context, which can include text, tables, and images."
+            f"User-provided question: {data_dict['question']}\n\n"
+            "Text and / or tables:\n"
+            f"{formatted_texts}"
+        ),
+    }
+    messages.append(text_message)
+    return [HumanMessage(content=messages)]
+def multi_modal_rag_chain(retriever):
+    """
+    Multi-modal RAG chain
+    """
+    # Multi-modal LLM
+    model = ChatOpenAI(temperature=0,
+                      model="gpt-4o-mini",
+                      max_tokens=1024,
+                      streaming=True)
+    # RAG pipeline
+    chain = (
+        {
+            "context": retriever | RunnableLambda(split_image_text_types),
+            "question": RunnablePassthrough(),
+        }
+        | RunnableLambda(img_prompt_func)
+        | model
+        | StrOutputParser()
+    )
+    return chain

chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f18abd8c514282db82706e52b0a33ed659cd534e925a6f149deb7af9ce34bd8e
+size 6284000

chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:effaa959ce2b30070fdafc2fe82096fc46e4ee7561b75920dd3ce43d09679b21
+size 100

chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6177c4c9be35ad9060ac687da99280169339d5e56adc6a71f735308995b9f0bf
+size 4000

chroma_langchain_db/0029ac48-b5c5-4755-9b42-a61f8e18b33c/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

chroma_langchain_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53475ae3bf140921f7a46a979f5411b5a2d537ebf2e564c5f61c015e3b45f92e
+size 2895872