saa231 commited on
Commit
70f14f4
Β·
verified Β·
1 Parent(s): 9ad4c7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -20
app.py CHANGED
@@ -6,44 +6,58 @@ from project_model import process_inputs, session
6
  # --- Handle Initial Upload ---
7
  def handle_initial(image, audio):
8
  if image is None or audio is None:
9
- return "❗ Please upload both an image and an audio clip."
10
 
11
  message, answer_audio = process_inputs(session, image=image, audio_path=audio)
12
 
13
- # Save image locally
14
- image_save_path = "uploaded_image.png"
15
- image.save(image_save_path)
 
 
 
 
16
 
17
  # Build markdown reply
18
  markdown_reply = (
19
  f"**{message}**\n\n"
20
- f"![Context Image](file/{image_save_path})\n\n"
21
- f"[πŸ”Š Listen to the Answer](file/{answer_audio})"
 
 
22
  )
23
- return markdown_reply
 
24
 
25
  # --- Handle Follow-up ---
26
  def handle_followup(followup_audio):
27
  if followup_audio is None:
28
- return "❗ Please record a follow-up question."
29
 
30
  message, answer_audio = process_inputs(session, audio_path=followup_audio)
31
 
32
- # Reuse saved image
33
- image_save_path = "uploaded_image.png"
34
- session.current_image.save(image_save_path)
 
 
 
 
 
35
 
36
  # Build markdown reply
37
  markdown_reply = (
38
  f"**{message}**\n\n"
39
- f"![Context Image](file/{image_save_path})\n\n"
40
- f"[πŸ”Š Listen to the Answer](file/{answer_audio})"
 
 
41
  )
42
- return markdown_reply
43
 
44
  # --- Gradio App ---
45
  with gr.Blocks() as demo:
46
- gr.Markdown("## πŸ‘οΈπŸŽ™οΈ Multimodal Visual Q&A with Audio Output")
47
 
48
  with gr.Row():
49
  with gr.Column():
@@ -51,24 +65,25 @@ with gr.Blocks() as demo:
51
  audio_input = gr.Audio(label="Initial Question (Voice)", sources=["microphone"], type="filepath")
52
  submit_btn = gr.Button("Submit Initial Q&A")
53
 
54
- gr.Markdown("### 🎀 Ask a Follow-up Question")
55
  followup_audio_input = gr.Audio(label="Follow-up Question", sources=["microphone"], type="filepath")
56
  followup_btn = gr.Button("Ask Follow-up")
57
 
58
  with gr.Column():
59
- status_output = gr.Textbox(label="Response", interactive=False, lines=10)
 
60
 
61
- # Single Textbox Output
62
  submit_btn.click(
63
  fn=handle_initial,
64
  inputs=[image_input, audio_input],
65
- outputs=status_output
66
  )
67
 
68
  followup_btn.click(
69
  fn=handle_followup,
70
  inputs=[followup_audio_input],
71
- outputs=status_output
72
  )
73
 
74
  if __name__ == "__main__":
 
6
  # --- Handle Initial Upload ---
7
  def handle_initial(image, audio):
8
  if image is None or audio is None:
9
+ return "Please upload both an image and an audio clip.", None
10
 
11
  message, answer_audio = process_inputs(session, image=image, audio_path=audio)
12
 
13
+ # Save images
14
+ original_path = "uploaded_image.png"
15
+ annotated_path = "annotated_image.png"
16
+
17
+ image.save(original_path)
18
+ if session.annotated_image:
19
+ session.annotated_image.save(annotated_path)
20
 
21
  # Build markdown reply
22
  markdown_reply = (
23
  f"**{message}**\n\n"
24
+ f"**Original Image:**\n\n"
25
+ f"![Original Image](file/{original_path})\n\n"
26
+ f"**Detected Objects:**\n\n"
27
+ f"![Annotated Image](file/{annotated_path})"
28
  )
29
+ return markdown_reply, answer_audio
30
+
31
 
32
  # --- Handle Follow-up ---
33
  def handle_followup(followup_audio):
34
  if followup_audio is None:
35
+ return "❗ Please record a follow-up question.", None
36
 
37
  message, answer_audio = process_inputs(session, audio_path=followup_audio)
38
 
39
+ # Save original and annotated images again
40
+ original_path = "uploaded_image.png"
41
+ annotated_path = "annotated_image.png"
42
+
43
+ if session.current_image:
44
+ session.current_image.save(original_path)
45
+ if session.annotated_image:
46
+ session.annotated_image.save(annotated_path)
47
 
48
  # Build markdown reply
49
  markdown_reply = (
50
  f"**{message}**\n\n"
51
+ f"**Original Image:**\n\n"
52
+ f"![Original Image](file/{original_path})\n\n"
53
+ f"**Detected Objects:**\n\n"
54
+ f"![Annotated Image](file/{annotated_path})"
55
  )
56
+ return markdown_reply, answer_audio
57
 
58
  # --- Gradio App ---
59
  with gr.Blocks() as demo:
60
+ gr.Markdown("##Multimodal Visual Q&A with Audio Output")
61
 
62
  with gr.Row():
63
  with gr.Column():
 
65
  audio_input = gr.Audio(label="Initial Question (Voice)", sources=["microphone"], type="filepath")
66
  submit_btn = gr.Button("Submit Initial Q&A")
67
 
68
+ gr.Markdown("### Ask a Follow-up Question")
69
  followup_audio_input = gr.Audio(label="Follow-up Question", sources=["microphone"], type="filepath")
70
  followup_btn = gr.Button("Ask Follow-up")
71
 
72
  with gr.Column():
73
+ status_output = gr.Markdown(label="Response") # change from Textbox to Markdown!
74
+ audio_output = gr.Audio(label="Audio Answer", interactive=False)
75
 
76
+ # Two outputs now
77
  submit_btn.click(
78
  fn=handle_initial,
79
  inputs=[image_input, audio_input],
80
+ outputs=[status_output, audio_output]
81
  )
82
 
83
  followup_btn.click(
84
  fn=handle_followup,
85
  inputs=[followup_audio_input],
86
+ outputs=[status_output, audio_output]
87
  )
88
 
89
  if __name__ == "__main__":