Update app.py
Browse filesmerged audio inputs
app.py
CHANGED
|
@@ -4,10 +4,8 @@ import os
|
|
| 4 |
from project_model import process_inputs, session
|
| 5 |
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
# Initial check for image and audio for first question
|
| 10 |
-
if image and audio:
|
| 11 |
message, answer_audio = process_inputs(session, image=image, audio_path=audio)
|
| 12 |
|
| 13 |
# Save images
|
|
@@ -31,11 +29,10 @@ def handle_question(image, audio, followup_audio=None):
|
|
| 31 |
#f"Your browser does not support the audio element.\n"
|
| 32 |
#f"</audio>"
|
| 33 |
)
|
| 34 |
-
# Handle follow-up
|
| 35 |
-
|
| 36 |
-
message, answer_audio = process_inputs(session, audio_path=followup_audio)
|
| 37 |
|
| 38 |
-
# Save
|
| 39 |
original_path = "uploaded_image.png"
|
| 40 |
annotated_path = "annotated_image.png"
|
| 41 |
|
|
@@ -53,7 +50,7 @@ def handle_question(image, audio, followup_audio=None):
|
|
| 53 |
#f""
|
| 54 |
)
|
| 55 |
else:
|
| 56 |
-
return "Please upload
|
| 57 |
|
| 58 |
return markdown_reply, answer_audio
|
| 59 |
|
|
@@ -65,8 +62,7 @@ with gr.Blocks() as demo:
|
|
| 65 |
with gr.Row():
|
| 66 |
with gr.Column():
|
| 67 |
image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
|
| 68 |
-
audio_input = gr.Audio(label="
|
| 69 |
-
followup_audio_input = gr.Audio(label="Follow-up Question (Voice)", sources=["microphone"], type="filepath")
|
| 70 |
|
| 71 |
submit_btn = gr.Button("Submit Question")
|
| 72 |
|
|
@@ -77,7 +73,7 @@ with gr.Blocks() as demo:
|
|
| 77 |
# Single button now for both initial and follow-up inputs
|
| 78 |
submit_btn.click(
|
| 79 |
fn=handle_question,
|
| 80 |
-
inputs=[image_input, audio_input
|
| 81 |
outputs=[status_output, audio_output]
|
| 82 |
)
|
| 83 |
|
|
|
|
| 4 |
from project_model import process_inputs, session
|
| 5 |
|
| 6 |
|
| 7 |
+
def handle_question(image, audio):
|
| 8 |
+
if image and audio: # Handle the initial question with image and audio
|
|
|
|
|
|
|
| 9 |
message, answer_audio = process_inputs(session, image=image, audio_path=audio)
|
| 10 |
|
| 11 |
# Save images
|
|
|
|
| 29 |
#f"Your browser does not support the audio element.\n"
|
| 30 |
#f"</audio>"
|
| 31 |
)
|
| 32 |
+
elif audio: # Handle follow-up with just audio
|
| 33 |
+
message, answer_audio = process_inputs(session, audio_path=audio)
|
|
|
|
| 34 |
|
| 35 |
+
# Save the current image if it exists
|
| 36 |
original_path = "uploaded_image.png"
|
| 37 |
annotated_path = "annotated_image.png"
|
| 38 |
|
|
|
|
| 50 |
#f""
|
| 51 |
)
|
| 52 |
else:
|
| 53 |
+
return "Please upload an image and/or record an audio clip.", None
|
| 54 |
|
| 55 |
return markdown_reply, answer_audio
|
| 56 |
|
|
|
|
| 62 |
with gr.Row():
|
| 63 |
with gr.Column():
|
| 64 |
image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
|
| 65 |
+
audio_input = gr.Audio(label="Ask a Question (Voice)", sources=["microphone"], type="filepath")
|
|
|
|
| 66 |
|
| 67 |
submit_btn = gr.Button("Submit Question")
|
| 68 |
|
|
|
|
| 73 |
# Single button now for both initial and follow-up inputs
|
| 74 |
submit_btn.click(
|
| 75 |
fn=handle_question,
|
| 76 |
+
inputs=[image_input, audio_input],
|
| 77 |
outputs=[status_output, audio_output]
|
| 78 |
)
|
| 79 |
|