saa231 commited on
Commit
334328d
·
verified ·
1 Parent(s): 4eee73a

Update app.py

Browse files

Merged the initial and follow-up record and submit buttons to reduce redundancy

Files changed (1) hide show
  1. app.py +57 -68
app.py CHANGED
@@ -3,64 +3,61 @@ import gradio as gr
3
  import os
4
  from project_model import process_inputs, session
5
 
6
- # --- Handle Initial Upload ---
7
- def handle_initial(image, audio):
8
- if image is None or audio is None:
9
- return "Please upload both an image and an audio clip.", None
10
-
11
- message, answer_audio = process_inputs(session, image=image, audio_path=audio)
12
-
13
- # Save images
14
- original_path = "uploaded_image.png"
15
- annotated_path = "annotated_image.png"
16
-
17
- image.save(original_path)
18
- if session.annotated_image:
19
- session.annotated_image.save(annotated_path)
20
-
21
- # Build markdown reply
22
- markdown_reply = (
23
- f"**{message}**\n\n"
24
- f"**Original Image:**\n\n"
25
- f"![Original Image](file/{original_path})\n\n"
26
- f"**Detected Objects:**\n\n"
27
- #f"![Annotated Image](file/{annotated_path})\n\n"
28
- #f"**🔊 Audio Response:**\n\n"
29
- #f"<audio controls autoplay>\n"
30
- #f" <source src='file/{answer_audio}' type='audio/wav'>\n"
31
- #f"Your browser does not support the audio element.\n"
32
- #f"</audio>"
33
- )
34
-
35
- return markdown_reply, answer_audio
36
-
37
-
38
- # --- Handle Follow-up ---
39
- def handle_followup(followup_audio):
40
- if followup_audio is None:
41
- return "❗ Please record a follow-up question.", None
42
-
43
- message, answer_audio = process_inputs(session, audio_path=followup_audio)
44
 
45
- # Save original and annotated images again
46
- original_path = "uploaded_image.png"
47
- annotated_path = "annotated_image.png"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- if session.current_image:
50
- session.current_image.save(original_path)
51
- if session.annotated_image:
52
- session.annotated_image.save(annotated_path)
53
-
54
- # Build markdown reply
55
- markdown_reply = (
56
- f"**{message}**\n\n"
57
- f"**Original Image:**\n\n"
58
- f"![Original Image](file/{original_path})\n\n"
59
- f"**Detected Objects:**\n\n"
60
- f"![Annotated Image](file/{annotated_path})"
61
- )
62
  return markdown_reply, answer_audio
63
 
 
64
  # --- Gradio App ---
65
  with gr.Blocks() as demo:
66
  gr.Markdown("## Multimodal Visual Q&A with Audio Output")
@@ -69,28 +66,20 @@ with gr.Blocks() as demo:
69
  with gr.Column():
70
  image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
71
  audio_input = gr.Audio(label="Initial Question (Voice)", sources=["microphone"], type="filepath")
72
- submit_btn = gr.Button("Submit Initial Q&A")
73
 
74
- gr.Markdown("### Ask a Follow-up Question")
75
- followup_audio_input = gr.Audio(label="Follow-up Question", sources=["microphone"], type="filepath")
76
- followup_btn = gr.Button("Ask Follow-up")
77
 
78
  with gr.Column():
79
  status_output = gr.Markdown(label="Response") # change from Textbox to Markdown!
80
  audio_output = gr.Audio(label="Audio Answer", interactive=False)
81
 
82
- # Two outputs now
83
  submit_btn.click(
84
- fn=handle_initial,
85
- inputs=[image_input, audio_input],
86
- outputs=[status_output, audio_output]
87
- )
88
-
89
- followup_btn.click(
90
- fn=handle_followup,
91
- inputs=[followup_audio_input],
92
  outputs=[status_output, audio_output]
93
  )
94
 
95
  if __name__ == "__main__":
96
- demo.launch(show_error=True, share=True)
 
3
  import os
4
  from project_model import process_inputs, session
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # --- Handle Initial and Follow-up ---
8
+ def handle_question(image, audio, followup_audio=None):
9
+ # Initial check for image and audio for first question
10
+ if image and audio:
11
+ message, answer_audio = process_inputs(session, image=image, audio_path=audio)
12
+
13
+ # Save images
14
+ original_path = "uploaded_image.png"
15
+ annotated_path = "annotated_image.png"
16
+
17
+ image.save(original_path)
18
+ if session.annotated_image:
19
+ session.annotated_image.save(annotated_path)
20
+
21
+ # Build markdown reply for initial question
22
+ markdown_reply = (
23
+ f"**{message}**\n\n"
24
+ #f"**Original Image:**\n\n"
25
+ #f"![Original Image](file/{original_path})\n\n"
26
+ #f"**Detected Objects:**\n\n"
27
+ #f"![Annotated Image](file/{annotated_path})\n\n"
28
+ #f"**🔊 Audio Response:**\n\n"
29
+ #f"<audio controls autoplay>\n"
30
+ #f" <source src='file/{answer_audio}' type='audio/wav'>\n"
31
+ #f"Your browser does not support the audio element.\n"
32
+ #f"</audio>"
33
+ )
34
+ # Handle follow-up if no initial image/audio but only follow-up audio
35
+ elif followup_audio:
36
+ message, answer_audio = process_inputs(session, audio_path=followup_audio)
37
+
38
+ # Save original and annotated images again
39
+ original_path = "uploaded_image.png"
40
+ annotated_path = "annotated_image.png"
41
+
42
+ if session.current_image:
43
+ session.current_image.save(original_path)
44
+ if session.annotated_image:
45
+ session.annotated_image.save(annotated_path)
46
+
47
+ # Build markdown reply for follow-up question
48
+ markdown_reply = (
49
+ f"**{message}**\n\n"
50
+ #f"**Original Image:**\n\n"
51
+ #f"![Original Image](file/{original_path})\n\n"
52
+ #f"**Detected Objects:**\n\n"
53
+ #f"![Annotated Image](file/{annotated_path})"
54
+ )
55
+ else:
56
+ return "Please upload both an image and an audio clip for the initial question or record a follow-up question.", None
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return markdown_reply, answer_audio
59
 
60
+
61
  # --- Gradio App ---
62
  with gr.Blocks() as demo:
63
  gr.Markdown("## Multimodal Visual Q&A with Audio Output")
 
66
  with gr.Column():
67
  image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
68
  audio_input = gr.Audio(label="Initial Question (Voice)", sources=["microphone"], type="filepath")
69
+ followup_audio_input = gr.Audio(label="Follow-up Question (Voice)", sources=["microphone"], type="filepath")
70
 
71
+ submit_btn = gr.Button("Submit Question")
 
 
72
 
73
  with gr.Column():
74
  status_output = gr.Markdown(label="Response") # change from Textbox to Markdown!
75
  audio_output = gr.Audio(label="Audio Answer", interactive=False)
76
 
77
+ # Single button now for both initial and follow-up inputs
78
  submit_btn.click(
79
+ fn=handle_question,
80
+ inputs=[image_input, audio_input, followup_audio_input],
 
 
 
 
 
 
81
  outputs=[status_output, audio_output]
82
  )
83
 
84
  if __name__ == "__main__":
85
+ demo.launch(show_error=True, share=True)