import gradio as gr from project_model import process_inputs def handle_inputs(image, audio): if image is None or audio is None: return "Please upload both an image and an audio clip.", None, None message, answer_audio = process_inputs(image, audio) return message, image, answer_audio with gr.Blocks() as demo: gr.Markdown("## 👁️🎙️ Multimodal Visual Q&A with Audio Output") with gr.Row(): with gr.Column(): image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil") audio_input = gr.Audio(label="Record Voice", sources=["microphone"], type="filepath") submit_btn = gr.Button("Submit") with gr.Column(): status_output = gr.Textbox(label="Status", interactive=False) image_display = gr.Image(label="Processed Image") audio_output = gr.Audio(label="Answer Audio", interactive=False) submit_btn.click(fn=handle_inputs, inputs=[image_input, audio_input], outputs=[status_output, image_display, audio_output]) if __name__ == "__main__": demo.launch(show_error=True, share=True)