| from PIL import Image |
| import gradio as gr |
| import os |
| import base64 |
| from io import BytesIO |
| from project_model import process_inputs, session |
|
|
| |
| def image_to_base64(img: Image.Image): |
| buffered = BytesIO() |
| img.save(buffered, format="PNG") |
| img_bytes = buffered.getvalue() |
| img_base64 = base64.b64encode(img_bytes).decode() |
| return f"data:image/png;base64,{img_base64}" |
|
|
| def audio_to_base64(audio_path): |
| with open(audio_path, "rb") as f: |
| audio_bytes = f.read() |
| audio_base64 = base64.b64encode(audio_bytes).decode() |
| return f"data:audio/wav;base64,{audio_base64}" |
|
|
| |
| def handle_interaction(image, audio, history): |
| if session.current_image is None and (image is None or audio is None): |
| return history + [("User", "Please upload an image and record an initial question.")] |
|
|
| |
| if image: |
| img_base64 = image_to_base64(image) |
| user_message = f"Uploaded Image:\n\n\n\n Asked Question (Voice Input)" |
| reply_text, reply_audio_path = process_inputs(session, image=image, audio_path=audio) |
| else: |
| user_message = "๐ค Follow-up Question (Voice Input)" |
| reply_text, reply_audio_path = process_inputs(session, audio_path=audio) |
|
|
| |
| img_base64_current = image_to_base64(session.current_image) |
| audio_base64 = audio_to_base64(reply_audio_path) |
|
|
| assistant_message = f"**{reply_text}**\n\n\n\n" \ |
| f"<audio controls><source src='{audio_base64}' type='audio/wav'></audio>" |
|
|
| return history + [(user_message, assistant_message)] |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# ๐๏ธ๐๏ธ Multimodal Visual Q&A with Audio Output (One-Button Version)") |
|
|
| chatbot = gr.ChatInterface( |
| fn=handle_interaction, |
| additional_inputs=[ |
| gr.Image(label="Upload or Capture Image (only needed initially)", sources=["upload", "webcam"], type="pil"), |
| gr.Audio(label="Question (Voice)", sources=["microphone"], type="filepath") |
| ], |
| submit_btn="Ask / Submit", |
| clear_btn="Clear", |
| chatbot=gr.Chatbot(show_label=False) |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(show_error=True, share=True) |
|
|