Spaces:

AamirAli123
/

Multimodal_chatbot

Runtime error

App Files Files Community

AamirAli123 commited on Mar 1, 2024

Commit

a5f38d9

verified ·

1 Parent(s): f2adcbc

Create app.py

Browse files

Files changed (1) hide show

app.py +102 -0

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import PIL.Image
+import gradio as gr
+import base64
+import os
+import google.generativeai as genai
+from dotenv import load_dotenv
+load_dotenv()
+# Set Google API key
+GOOGLe_API_KEY = os.getenv("GOOGLE_API_KEY")
+genai.configure(api_key = GOOGLe_API_KEY)
+# Create the Model
+txt_model = genai.GenerativeModel('gemini-pro')
+vis_model = genai.GenerativeModel('gemini-pro-vision')
+# Image to Base 64 Converter
+def image_to_base64(image_path):
+    with open(image_path, 'rb') as img:
+        encoded_string = base64.b64encode(img.read())
+    return encoded_string.decode('utf-8')
+# Function that takes User Inputs and displays it on ChatUI
+def query_message(history,txt,img):
+    if not img:
+        history += [(txt,None)]
+        return history
+    base64 = image_to_base64(img)
+    data_url = f"data:image/jpeg;base64,{base64}"
+    history += [(f"{txt} ![]({data_url})", None)]
+    return history
+# Function that takes User Inputs, generates Response and displays on Chat UI
+def llm_response(history,text,img):
+    if not img:
+        response = txt_model.generate_content(text)
+        history += [(None,response.text)]
+        return history, gr.update(value = "")
+    else:
+        img = PIL.Image.open(img)
+        response = vis_model.generate_content([text,img])
+        history += [(None,response.text)]
+        return history, gr.update(value = "")
+def image_to_base64(image_path):
+    """
+    Reads an image file and returns its base64 encoded representation.
+    Args:
+        image_path (str): The path to the image file.
+    Returns:
+        str: The base64 encoded representation of the image data.
+    """
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+# Encode the logo image into base64
+logo_base64 = image_to_base64("pixelpk_logo.png")
+markdown_content = f"""
+<img src="data:image/png;base64,{logo_base64}" alt="Feedback Logo" style="width: 100px; height: 100px; margin-top: 10px;" />
+<h1>MultiModal Chatbot</h1>
+<p style="margin-top: 5px;">Multimodal chatbot is designed to chat with text and images.</p>
+"""
+css = """
+h1 {
+    text-align: center;
+    display:block;
+}
+"""
+# Interface Code
+with gr.Blocks(gr.themes.Monochrome(), css = css) as app:
+    # Display introductory markdown content
+    gr.Markdown(f"<center>{markdown_content}</center>")
+    with gr.Row():
+        image_box = gr.Image(type = "filepath")
+        chatbot = gr.Chatbot(scale = 3)
+    text_box = gr.Textbox(
+            placeholder="Enter text and press enter, or upload an image",
+            container=False,
+        )
+    btn = gr.Button("Submit")
+    clicked = btn.click(query_message,
+                        [chatbot,text_box,image_box],
+                        [chatbot]
+                        ).then(llm_response,
+                                [chatbot,text_box,image_box],
+                                [chatbot, text_box]
+                                )
+    clicked = text_box.submit(query_message,
+                        [chatbot,text_box,image_box],
+                        [chatbot]
+                        ).then(llm_response,
+                                [chatbot,text_box,image_box],
+                                [chatbot, text_box]
+                                )
+app.queue()
+app.launch(share = True, debug = True)