saa231 commited on
Commit
8cdf492
Β·
verified Β·
1 Parent(s): 19c4411

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -38
app.py CHANGED
@@ -3,63 +3,72 @@ import gradio as gr
3
  import os
4
  from project_model import process_inputs, session
5
 
6
- # --- Handle submission for both initial and follow-up ---
7
- def handle_submit(image, audio):
8
- """
9
- Handles both initial upload and follow-up.
10
- Logic:
11
- - If new image + audio => initial interaction
12
- - If only audio => follow-up
13
- """
14
- if image is not None and audio is not None:
15
- # Initial case: new image uploaded
16
- message, answer_audio = process_inputs(session, image=image, audio_path=audio)
17
 
18
- # Save uploaded image
19
- image_save_path = "uploaded_image.png"
20
- image.save(image_save_path)
21
 
22
- markdown_reply = f"**{message}**\n\n![Context Image](file/{image_save_path})"
23
- return markdown_reply, answer_audio
 
24
 
25
- elif audio is not None:
26
- # Follow-up case: use existing image and new audio
27
- if session.current_image is None:
28
- return "❗ No previous image found. Please upload an image first.", None
 
 
 
29
 
30
- # Pass the session's current image again
31
- message, answer_audio = process_inputs(session, image=session.current_image, audio_path=audio)
 
 
32
 
33
- # Save existing image (again)
34
- image_save_path = "uploaded_image.png"
35
- session.current_image.save(image_save_path)
36
 
37
- markdown_reply = f"**{message}**\n\n![Context Image](file/{image_save_path})"
38
- return markdown_reply, answer_audio
 
39
 
40
- else:
41
- return "❗ Please upload an image and/or record audio.", None
 
 
 
 
 
42
 
43
  # --- Gradio App ---
44
  with gr.Blocks() as demo:
45
- gr.Markdown("## πŸ‘οΈπŸŽ™οΈ Multimodal Visual Q&A with Audio Response")
46
 
47
  with gr.Row():
48
  with gr.Column():
49
- image_input = gr.Image(label="Upload or Capture Image (only first time)", sources=["upload", "webcam"], type="pil")
50
- audio_input = gr.Audio(label="Speak Your Question", sources=["microphone"], type="filepath")
 
51
 
52
- submit_btn = gr.Button("Submit Question")
 
 
53
 
54
  with gr.Column():
55
- status_output = gr.Markdown(label="Response")
56
- audio_output = gr.Audio(label="πŸ”Š Listen to Response", interactive=False)
57
 
58
- # Connect submit button
59
  submit_btn.click(
60
- fn=handle_submit,
61
  inputs=[image_input, audio_input],
62
- outputs=[status_output, audio_output]
 
 
 
 
 
 
63
  )
64
 
65
  if __name__ == "__main__":
 
3
  import os
4
  from project_model import process_inputs, session
5
 
6
+ # --- Handle Initial Upload ---
7
+ def handle_initial(image, audio):
8
+ if image is None or audio is None:
9
+ return "❗ Please upload both an image and an audio clip."
 
 
 
 
 
 
 
10
 
11
+ message, answer_audio = process_inputs(session, image=image, audio_path=audio)
 
 
12
 
13
+ # Save image locally
14
+ image_save_path = "uploaded_image.png"
15
+ image.save(image_save_path)
16
 
17
+ # Build markdown reply
18
+ markdown_reply = (
19
+ f"**{message}**\n\n"
20
+ f"![Context Image](file/{image_save_path})\n\n"
21
+ f"[πŸ”Š Listen to the Answer](file/{answer_audio})"
22
+ )
23
+ return markdown_reply
24
 
25
+ # --- Handle Follow-up ---
26
+ def handle_followup(followup_audio):
27
+ if followup_audio is None:
28
+ return "❗ Please record a follow-up question."
29
 
30
+ message, answer_audio = process_inputs(session, audio_path=followup_audio)
 
 
31
 
32
+ # Reuse saved image
33
+ image_save_path = "uploaded_image.png"
34
+ session.current_image.save(image_save_path)
35
 
36
+ # Build markdown reply
37
+ markdown_reply = (
38
+ f"**{message}**\n\n"
39
+ f"![Context Image](file/{image_save_path})\n\n"
40
+ f"[πŸ”Š Listen to the Answer](file/{answer_audio})"
41
+ )
42
+ return markdown_reply
43
 
44
  # --- Gradio App ---
45
  with gr.Blocks() as demo:
46
+ gr.Markdown("## πŸ‘οΈπŸŽ™οΈ Multimodal Visual Q&A with Audio Output")
47
 
48
  with gr.Row():
49
  with gr.Column():
50
+ image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
51
+ audio_input = gr.Audio(label="Initial Question (Voice)", sources=["microphone"], type="filepath")
52
+ submit_btn = gr.Button("Submit Initial Q&A")
53
 
54
+ gr.Markdown("### 🎀 Ask a Follow-up Question")
55
+ followup_audio_input = gr.Audio(label="Follow-up Question", sources=["microphone"], type="filepath")
56
+ followup_btn = gr.Button("Ask Follow-up")
57
 
58
  with gr.Column():
59
+ status_output = gr.Textbox(label="Response", interactive=False, lines=10)
 
60
 
61
+ # Single Textbox Output
62
  submit_btn.click(
63
+ fn=handle_initial,
64
  inputs=[image_input, audio_input],
65
+ outputs=status_output
66
+ )
67
+
68
+ followup_btn.click(
69
+ fn=handle_followup,
70
+ inputs=[followup_audio_input],
71
+ outputs=status_output
72
  )
73
 
74
  if __name__ == "__main__":