| import gradio as gr | |
| from project_model import process_inputs | |
| def handle_inputs(image, audio): | |
| if image is None or audio is None: | |
| return "Please upload both an image and an audio clip.", None, None | |
| message, answer_audio = process_inputs(image, audio) | |
| return message, image, answer_audio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 👁️🎙️ Multimodal Visual Q&A with Audio Output") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil") | |
| audio_input = gr.Audio(label="Record Voice", sources=["microphone"], type="filepath") | |
| submit_btn = gr.Button("Submit") | |
| with gr.Column(): | |
| status_output = gr.Textbox(label="Status", interactive=False) | |
| image_display = gr.Image(label="Processed Image") | |
| audio_output = gr.Audio(label="Answer Audio", interactive=False) | |
| submit_btn.click(fn=handle_inputs, inputs=[image_input, audio_input], | |
| outputs=[status_output, image_display, audio_output]) | |
| if __name__ == "__main__": | |
| demo.launch(show_error=True, share=True) | |