saa231 commited on
Commit
109c765
·
verified ·
1 Parent(s): ca82b95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -48
app.py CHANGED
@@ -1,62 +1,48 @@
1
  from PIL import Image
2
  import gradio as gr
3
- import os
4
  import uuid
5
  from project_model import process_inputs, session
6
 
7
 
8
  def handle_question(image, audio):
9
- #try:
10
- if image and audio: # Handle the initial question with image and audio
 
 
 
 
 
11
  session.current_image = image
12
- message, answer_audio = process_inputs(session, image=image, audio_path=audio)
13
-
14
- # Save images
 
 
 
 
 
 
 
 
15
  unique_id = uuid.uuid4().hex
16
  original_path = f"uploaded_image_{unique_id}.png"
17
  annotated_path = f"annotated_image_{unique_id}.png"
18
-
19
  image.save(original_path)
 
20
  if session.annotated_image:
21
  session.annotated_image.save(annotated_path)
22
-
23
- # Build markdown reply for initial question
24
- markdown_reply = (
25
- f"**{message}**\n\n"
26
- #f"**Original Image:**\n\n"
27
- #f"![Original Image](file/{original_path})\n\n"
28
- #f"**Detected Objects:**\n\n"
29
- #f"![Annotated Image](file/{annotated_path})\n\n"
30
- #f"**🔊 Audio Response:**\n\n"
31
- #f"<audio controls autoplay>\n"
32
- #f" <source src='file/{answer_audio}' type='audio/wav'>\n"
33
- #f"Your browser does not support the audio element.\n"
34
- #f"</audio>"
35
- )
36
- elif audio: # Handle follow-up with just audio
37
-
38
- if not session.current_image:
39
- return "No initial image found for the follow-up question.", None
40
-
41
- message, answer_audio = process_inputs(session, image=session.current_image, audio_path=audio)
42
-
43
-
44
-
45
- # Build markdown reply for follow-up question
46
- markdown_reply = (
47
- f"**{message}**\n\n"
48
- #f"**Original Image:**\n\n"
49
- #f"![Original Image](file/{original_path})\n\n"
50
- #f"**Detected Objects:**\n\n"
51
- #f"![Annotated Image](file/{annotated_path})"
52
- )
53
- else:
54
- return "Please upload an image and/or record an audio clip.", None
55
-
56
  return markdown_reply, answer_audio
57
- #except ValueError as e:
58
- # return f"❗Error: {str(e)}", None
59
-
60
 
61
 
62
  # --- Gradio App ---
@@ -67,14 +53,13 @@ with gr.Blocks() as demo:
67
  with gr.Column():
68
  image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
69
  audio_input = gr.Audio(label="Ask a Question (Voice)", sources=["microphone"], type="filepath")
70
-
71
  submit_btn = gr.Button("Submit Question")
72
 
73
  with gr.Column():
74
- status_output = gr.Markdown(label="Response") # change from Textbox to Markdown!
75
  audio_output = gr.Audio(label="Audio Answer", interactive=False)
76
 
77
- # Single button now for both initial and follow-up inputs
78
  submit_btn.click(
79
  fn=handle_question,
80
  inputs=[image_input, audio_input],
@@ -82,4 +67,4 @@ with gr.Blocks() as demo:
82
  )
83
 
84
  if __name__ == "__main__":
85
- demo.launch(show_error=True, share=True)
 
1
  from PIL import Image
2
  import gradio as gr
 
3
  import uuid
4
  from project_model import process_inputs, session
5
 
6
 
7
  def handle_question(image, audio):
8
+ try:
9
+ # No input provided
10
+ if not image and not audio:
11
+ return "Please upload an image and/or record an audio clip.", None
12
+
13
+ # New question with both image + audio
14
+ if image is not None:
15
  session.current_image = image
16
+ session.messages = []
17
+ session.images = []
18
+
19
+ if session.current_image is None:
20
+ return "No initial image found. Please upload an image first.", None
21
+
22
+ # Process inputs
23
+ message, answer_audio = process_inputs(session, image=session.current_image, audio_path=audio)
24
+
25
+ # Save images (only if a new image was uploaded)
26
+ if image:
27
  unique_id = uuid.uuid4().hex
28
  original_path = f"uploaded_image_{unique_id}.png"
29
  annotated_path = f"annotated_image_{unique_id}.png"
 
30
  image.save(original_path)
31
+
32
  if session.annotated_image:
33
  session.annotated_image.save(annotated_path)
34
+
35
+ # Build Markdown reply
36
+ markdown_reply = f"**{message}**\n\n"
37
+ # Uncomment if you want images/audio previews inside Markdown
38
+ # markdown_reply += f"![Original Image](file/{original_path})\n\n"
39
+ # markdown_reply += f"![Annotated Image](file/{annotated_path})\n\n"
40
+ # markdown_reply += f"<audio controls autoplay><source src='file/{answer_audio}' type='audio/wav'></audio>"
41
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  return markdown_reply, answer_audio
43
+
44
+ except ValueError as e:
45
+ return f"Error: {str(e)}", None
46
 
47
 
48
  # --- Gradio App ---
 
53
  with gr.Column():
54
  image_input = gr.Image(label="Upload or Capture Image", sources=["upload", "webcam"], type="pil")
55
  audio_input = gr.Audio(label="Ask a Question (Voice)", sources=["microphone"], type="filepath")
 
56
  submit_btn = gr.Button("Submit Question")
57
 
58
  with gr.Column():
59
+ status_output = gr.Markdown(label="Response") # Use Markdown to format answers
60
  audio_output = gr.Audio(label="Audio Answer", interactive=False)
61
 
62
+ # Connect button to function
63
  submit_btn.click(
64
  fn=handle_question,
65
  inputs=[image_input, audio_input],
 
67
  )
68
 
69
  if __name__ == "__main__":
70
+ demo.launch(show_error=True, share=True)