saa231's picture
Update app.py
1ebfcf0 verified
raw
history blame
2.34 kB
from PIL import Image
import gradio as gr
import os
import base64
from io import BytesIO
from project_model import process_inputs, session
# --- Helpers ---
def image_to_base64(img: Image.Image):
buffered = BytesIO()
img.save(buffered, format="PNG")
img_bytes = buffered.getvalue()
img_base64 = base64.b64encode(img_bytes).decode()
return f"data:image/png;base64,{img_base64}"
def audio_to_base64(audio_path):
with open(audio_path, "rb") as f:
audio_bytes = f.read()
audio_base64 = base64.b64encode(audio_bytes).decode()
return f"data:audio/wav;base64,{audio_base64}"
# --- Unified handler ---
def handle_interaction(image, audio, history):
if session.current_image is None and (image is None or audio is None):
return history + [("User", "Please upload an image and record an initial question.")]
# If image is provided, it's an initial question
if image:
img_base64 = image_to_base64(image)
user_message = f"Uploaded Image:\n\n![]({img_base64})\n\n Asked Question (Voice Input)"
reply_text, reply_audio_path = process_inputs(session, image=image, audio_path=audio)
else:
user_message = "๐ŸŽค Follow-up Question (Voice Input)"
reply_text, reply_audio_path = process_inputs(session, audio_path=audio)
# Prepare assistant reply
img_base64_current = image_to_base64(session.current_image)
audio_base64 = audio_to_base64(reply_audio_path)
assistant_message = f"**{reply_text}**\n\n![]({img_base64_current})\n\n" \
f"<audio controls><source src='{audio_base64}' type='audio/wav'></audio>"
return history + [(user_message, assistant_message)]
# --- Gradio App ---
with gr.Blocks() as demo:
gr.Markdown("# ๐Ÿ‘๏ธ๐ŸŽ™๏ธ Multimodal Visual Q&A with Audio Output (One-Button Version)")
chatbot = gr.ChatInterface(
fn=handle_interaction,
additional_inputs=[
gr.Image(label="Upload or Capture Image (only needed initially)", sources=["upload", "webcam"], type="pil"),
gr.Audio(label="Question (Voice)", sources=["microphone"], type="filepath")
],
submit_btn="Ask / Submit",
clear_btn="Clear",
chatbot=gr.Chatbot(show_label=False)
)
if __name__ == "__main__":
demo.launch(show_error=True, share=True)