Spaces:

allantacuelwvsu
/

multimodal_chatbot

Sleeping

App Files Files Community

allantacuelwvsu commited on Apr 1, 2025

Commit

4344f16

1 Parent(s): 8c2a213

add app.py, requirements.txt

Browse files

Files changed (2) hide show

app.py +164 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import streamlit as st
+import requests
+import os
+import time
+# Load Hugging Face API key
+HF_API_KEY = os.getenv("HF_API_KEY")
+# Define API URLs
+IMG2TEXT_API = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
+CHAT_API = "https://api-inference.huggingface.co/models/facebook/blenderbot-3B"
+HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}
+# App Title
+st.title("Multimodal Chatbot")
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+    # Add initial bot welcome message
+    initial_message = "Hello! I'm your chatbot. You can upload an image or ask me anything to get started!"
+    st.session_state.messages.append({"role": "assistant", "content": initial_message})
+# Display chat history
+for msg in st.session_state.messages:
+    with st.chat_message(msg["role"]):
+        st.write(msg["content"])
+# Image upload
+uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
+# User input
+user_input = st.chat_input("Ask about this image or anything...")
+image_caption = None
+# Process image if uploaded
+if uploaded_file:
+    # Check image type
+    if uploaded_file.type not in ["image/jpeg", "image/png"]:
+        st.error("⚠️ Please upload a valid JPG or PNG image.")
+    else:
+        # Send image to Hugging Face image-to-text API with retries
+        img_bytes = uploaded_file.read()
+        st.session_state.messages.append({"role": "user", "content": "[Image Uploaded]"})
+        with st.chat_message("user"):
+            st.image(img_bytes, caption="Uploaded Image", use_column_width=True)
+            # st.write(f"**Image to text context generated:** {image_caption}") fix plz
+        max_retries = 3
+        for i in range(max_retries):
+            response = requests.post(
+                IMG2TEXT_API,
+                headers={
+                    "Authorization": f"Bearer {HF_API_KEY}",
+                    "Content-Type": "application/octet-stream",
+                },
+                data=img_bytes  # Send raw image data
+            )
+            if response.status_code == 200:
+                try:
+                    res_json = response.json()
+                    # Check for list format and dictionary format
+                    if isinstance(res_json, list) and len(res_json) > 0:
+                        image_caption = res_json[0].get("generated_text", "⚠️ No caption generated.")
+                    elif isinstance(res_json, dict) and "generated_text" in res_json:
+                        image_caption = res_json["generated_text"]
+                    if image_caption:
+                        st.session_state.image_caption = image_caption
+                        bot_context = (
+                            f"Consider this image: {image_caption}. Please provide a relevant and engaging response to the image."
+                        )
+                        payload = {"inputs": bot_context}
+                        # Send context to chatbot
+                        bot_response = requests.post(CHAT_API, headers=HEADERS, json=payload)
+                        if bot_response.status_code == 200:
+                            res_json = bot_response.json()
+                            # Check if the response is a list or dictionary
+                            if isinstance(res_json, list) and len(res_json) > 0:
+                                bot_reply = res_json[0].get("generated_text", "I received your image. What would you like to ask about it?")
+                            elif isinstance(res_json, dict) and "generated_text" in res_json:
+                                bot_reply = res_json["generated_text"]
+                            else:
+                                bot_reply = "I received your image. What would you like to ask about it?"
+                        else:
+                            bot_reply = "I received your image. What would you like to ask about it?"
+                        # Append chatbot's generated response
+                        st.session_state.messages.append({"role": "assistant", "content": bot_reply})
+                        with st.chat_message("assistant"):
+                            st.write(bot_reply)
+                        uploaded_file = None  # Clear image after processing
+                        break  # Successful, no need to retry
+                    else:
+                        st.error("⚠️ Unexpected response format from image captioning API.")
+                        break
+                except (KeyError, IndexError, TypeError) as e:
+                    st.error(f"⚠️ Error: Unable to generate caption. Details: {e}")
+                    break
+            elif response.status_code == 503:
+                st.warning(f"⏳ Model warming up... Retrying in 5 seconds. Attempt {i+1}/{max_retries}")
+                time.sleep(5)  # Wait before retrying
+            else:
+                st.error(f"⚠️ Image API Error: {response.status_code} - {response.text}")
+                break
+# Process user input if provided
+if user_input:
+    combined_input = user_input
+    # Merge image caption with user query if an image was uploaded
+    if "image_caption" in st.session_state and st.session_state.image_caption:
+        combined_input = f"Image context: {st.session_state.image_caption}. {user_input}"
+    # Append user message
+    st.session_state.messages.append({"role": "user", "content": user_input})
+    with st.chat_message("user"):
+        st.write(user_input)
+    # Send combined input to chatbot with retries
+    payload = {"inputs": combined_input}
+    max_retries = 3
+    for i in range(max_retries):
+        response = requests.post(CHAT_API, headers=HEADERS, json=payload)
+        if response.status_code == 200:
+            try:
+                res_json = response.json()
+                # If it's a dictionary and contains 'generated_text'
+                if isinstance(res_json, dict) and "generated_text" in res_json:
+                    bot_reply = res_json["generated_text"]
+                    break  # Successful, no need to retry
+                # If response is a list (some models return list format)
+                elif isinstance(res_json, list) and len(res_json) > 0 and "generated_text" in res_json[0]:
+                    bot_reply = res_json[0]["generated_text"]
+                    break
+                else:
+                    st.error("⚠️ Unexpected response format from chatbot API.")
+                    bot_reply = "⚠️ Unable to generate a response."
+                    break
+            except (KeyError, TypeError, IndexError):
+                bot_reply = "⚠️ Error: Unable to generate response."
+                break
+        elif response.status_code == 503:
+            st.warning(f"⏳ Model warming up... Retrying in 5 seconds. Attempt {i+1}/{max_retries}")
+            time.sleep(5)  # Wait before retrying
+        else:
+            bot_reply = f"⚠️ Chatbot Error {response.status_code}: {response.text}"
+            break
+    # Append bot response
+    st.session_state.messages.append({"role": "assistant", "content": bot_reply})
+    with st.chat_message("assistant"):
+        st.write(bot_reply)
+# Clear button to reset chat
+if st.button("Clear Chat"):
+    st.session_state.messages = []
+    st.experimental_rerun()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Requests==2.32.3
2	+ streamlit==1.30.0