akazmi commited on
Commit
6537e41
·
verified ·
1 Parent(s): 5af1eca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -17,15 +17,17 @@ def text_to_speech(text):
17
  # Speech-to-Text function
18
  def speech_to_text(audio):
19
  recognizer = sr.Recognizer()
20
- with sr.AudioFile(audio) as source:
21
- audio_data = recognizer.record(source)
22
- try:
23
  text = recognizer.recognize_google(audio_data)
24
  return text
25
- except sr.UnknownValueError:
26
- return "Sorry, I could not understand the audio."
27
- except sr.RequestError as e:
28
- return f"Could not request results; {e}"
 
 
29
 
30
  # Image Description function
31
  def generate_image_description(image):
@@ -56,33 +58,53 @@ def generate_video_description(video):
56
  frame_count += 1
57
 
58
  cap.release()
59
- return descriptions
60
 
61
  # Gradio Interface
62
  def main():
63
  with gr.Blocks() as app:
64
  gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
65
 
66
- # Text-to-Speech
 
 
 
 
 
67
  with gr.Row():
68
  text_input = gr.Textbox(label="Enter text for Text-to-Speech")
69
  tts_button = gr.Button("Convert to Speech")
70
  tts_output = gr.Audio(label="TTS Output")
71
  tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
72
 
73
- # Speech-to-Text
 
 
 
 
 
74
  stt_input = gr.Audio(label="Record Audio", type="filepath")
75
  stt_button = gr.Button("Convert Speech to Text")
76
  stt_output = gr.Textbox(label="Speech-to-Text Output")
77
  stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
78
 
79
- # Image Description
 
 
 
 
 
80
  image_input = gr.Image(label="Upload an Image")
81
  image_desc_output = gr.Textbox(label="Image Description")
82
  image_desc_button = gr.Button("Describe Image")
83
  image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
84
 
85
- # Video Description
 
 
 
 
 
86
  video_input = gr.File(label="Upload a Video")
87
  video_desc_output = gr.Textbox(label="Video Descriptions")
88
  video_desc_button = gr.Button("Describe Video")
 
17
  # Speech-to-Text function
18
  def speech_to_text(audio):
19
  recognizer = sr.Recognizer()
20
+ try:
21
+ with sr.AudioFile(audio) as source:
22
+ audio_data = recognizer.record(source)
23
  text = recognizer.recognize_google(audio_data)
24
  return text
25
+ except sr.UnknownValueError:
26
+ return "Sorry, I could not understand the audio."
27
+ except sr.RequestError as e:
28
+ return f"Could not request results; {e}"
29
+ except Exception as e:
30
+ return f"An error occurred: {e}"
31
 
32
  # Image Description function
33
  def generate_image_description(image):
 
58
  frame_count += 1
59
 
60
  cap.release()
61
+ return descriptions if descriptions else ["No frames to describe."]
62
 
63
  # Gradio Interface
64
  def main():
65
  with gr.Blocks() as app:
66
  gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
67
 
68
+ # Text-to-Speech Section
69
+ gr.Markdown("<h2>Text-to-Speech</h2>")
70
+ gr.Markdown("**Core Idea:** Create natural-sounding speech from text input.\n"
71
+ "**Functionality:** Converts written text into spoken words, helping individuals with reading difficulties or visual impairments.\n"
72
+ "**Target Audience:** People with visual impairments, reading disabilities, and those who prefer audio content.")
73
+ gr.Markdown("Supported Input: **Plain text**. \nOutput: **MP3 audio file**.")
74
  with gr.Row():
75
  text_input = gr.Textbox(label="Enter text for Text-to-Speech")
76
  tts_button = gr.Button("Convert to Speech")
77
  tts_output = gr.Audio(label="TTS Output")
78
  tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
79
 
80
+ # Speech-to-Text Section
81
+ gr.Markdown("<h2>Speech-to-Text</h2>")
82
+ gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
83
+ "**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
84
+ "**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
85
+ gr.Markdown("Supported Input: **WAV, FLAC, AIFF (or Microphone Input)**. \nOutput: **Transcribed text**.")
86
  stt_input = gr.Audio(label="Record Audio", type="filepath")
87
  stt_button = gr.Button("Convert Speech to Text")
88
  stt_output = gr.Textbox(label="Speech-to-Text Output")
89
  stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
90
 
91
+ # Image Description Section
92
+ gr.Markdown("<h2>Image Description</h2>")
93
+ gr.Markdown("**Core Idea:** Generate descriptive text for images.\n"
94
+ "**Functionality:** Analyzes and describes the content of images, making visual information accessible to those who are visually impaired.\n"
95
+ "**Target Audience:** Individuals with visual impairments and those needing assistance in understanding visual content.")
96
+ gr.Markdown("Supported Input: **JPEG, PNG, BMP, GIF**. \nOutput: **Text description**.")
97
  image_input = gr.Image(label="Upload an Image")
98
  image_desc_output = gr.Textbox(label="Image Description")
99
  image_desc_button = gr.Button("Describe Image")
100
  image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
101
 
102
+ # Video Description Section
103
+ gr.Markdown("<h2>Video Description</h2>")
104
+ gr.Markdown("**Core Idea:** Describe video content through generated text.\n"
105
+ "**Functionality:** Provides textual descriptions of video frames, aiding understanding for those who cannot see the video.\n"
106
+ "**Target Audience:** Individuals with visual impairments and those needing assistance in interpreting video content.")
107
+ gr.Markdown("Supported Input: **MP4, AVI, MKV**. \nOutput: **List of text descriptions**.")
108
  video_input = gr.File(label="Upload a Video")
109
  video_desc_output = gr.Textbox(label="Video Descriptions")
110
  video_desc_button = gr.Button("Describe Video")