Update app.py
Browse files
app.py
CHANGED
|
@@ -17,15 +17,17 @@ def text_to_speech(text):
|
|
| 17 |
# Speech-to-Text function
|
| 18 |
def speech_to_text(audio):
|
| 19 |
recognizer = sr.Recognizer()
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
text = recognizer.recognize_google(audio_data)
|
| 24 |
return text
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Image Description function
|
| 31 |
def generate_image_description(image):
|
|
@@ -56,33 +58,53 @@ def generate_video_description(video):
|
|
| 56 |
frame_count += 1
|
| 57 |
|
| 58 |
cap.release()
|
| 59 |
-
return descriptions
|
| 60 |
|
| 61 |
# Gradio Interface
|
| 62 |
def main():
|
| 63 |
with gr.Blocks() as app:
|
| 64 |
gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
|
| 65 |
|
| 66 |
-
# Text-to-Speech
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
with gr.Row():
|
| 68 |
text_input = gr.Textbox(label="Enter text for Text-to-Speech")
|
| 69 |
tts_button = gr.Button("Convert to Speech")
|
| 70 |
tts_output = gr.Audio(label="TTS Output")
|
| 71 |
tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
|
| 72 |
|
| 73 |
-
# Speech-to-Text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
stt_input = gr.Audio(label="Record Audio", type="filepath")
|
| 75 |
stt_button = gr.Button("Convert Speech to Text")
|
| 76 |
stt_output = gr.Textbox(label="Speech-to-Text Output")
|
| 77 |
stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
|
| 78 |
|
| 79 |
-
# Image Description
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
image_input = gr.Image(label="Upload an Image")
|
| 81 |
image_desc_output = gr.Textbox(label="Image Description")
|
| 82 |
image_desc_button = gr.Button("Describe Image")
|
| 83 |
image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
|
| 84 |
|
| 85 |
-
# Video Description
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
video_input = gr.File(label="Upload a Video")
|
| 87 |
video_desc_output = gr.Textbox(label="Video Descriptions")
|
| 88 |
video_desc_button = gr.Button("Describe Video")
|
|
|
|
| 17 |
# Speech-to-Text function
|
| 18 |
def speech_to_text(audio):
|
| 19 |
recognizer = sr.Recognizer()
|
| 20 |
+
try:
|
| 21 |
+
with sr.AudioFile(audio) as source:
|
| 22 |
+
audio_data = recognizer.record(source)
|
| 23 |
text = recognizer.recognize_google(audio_data)
|
| 24 |
return text
|
| 25 |
+
except sr.UnknownValueError:
|
| 26 |
+
return "Sorry, I could not understand the audio."
|
| 27 |
+
except sr.RequestError as e:
|
| 28 |
+
return f"Could not request results; {e}"
|
| 29 |
+
except Exception as e:
|
| 30 |
+
return f"An error occurred: {e}"
|
| 31 |
|
| 32 |
# Image Description function
|
| 33 |
def generate_image_description(image):
|
|
|
|
| 58 |
frame_count += 1
|
| 59 |
|
| 60 |
cap.release()
|
| 61 |
+
return descriptions if descriptions else ["No frames to describe."]
|
| 62 |
|
| 63 |
# Gradio Interface
|
| 64 |
def main():
|
| 65 |
with gr.Blocks() as app:
|
| 66 |
gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
|
| 67 |
|
| 68 |
+
# Text-to-Speech Section
|
| 69 |
+
gr.Markdown("<h2>Text-to-Speech</h2>")
|
| 70 |
+
gr.Markdown("**Core Idea:** Create natural-sounding speech from text input.\n"
|
| 71 |
+
"**Functionality:** Converts written text into spoken words, helping individuals with reading difficulties or visual impairments.\n"
|
| 72 |
+
"**Target Audience:** People with visual impairments, reading disabilities, and those who prefer audio content.")
|
| 73 |
+
gr.Markdown("Supported Input: **Plain text**. \nOutput: **MP3 audio file**.")
|
| 74 |
with gr.Row():
|
| 75 |
text_input = gr.Textbox(label="Enter text for Text-to-Speech")
|
| 76 |
tts_button = gr.Button("Convert to Speech")
|
| 77 |
tts_output = gr.Audio(label="TTS Output")
|
| 78 |
tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
|
| 79 |
|
| 80 |
+
# Speech-to-Text Section
|
| 81 |
+
gr.Markdown("<h2>Speech-to-Text</h2>")
|
| 82 |
+
gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
|
| 83 |
+
"**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
|
| 84 |
+
"**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
|
| 85 |
+
gr.Markdown("Supported Input: **WAV, FLAC, AIFF (or Microphone Input)**. \nOutput: **Transcribed text**.")
|
| 86 |
stt_input = gr.Audio(label="Record Audio", type="filepath")
|
| 87 |
stt_button = gr.Button("Convert Speech to Text")
|
| 88 |
stt_output = gr.Textbox(label="Speech-to-Text Output")
|
| 89 |
stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
|
| 90 |
|
| 91 |
+
# Image Description Section
|
| 92 |
+
gr.Markdown("<h2>Image Description</h2>")
|
| 93 |
+
gr.Markdown("**Core Idea:** Generate descriptive text for images.\n"
|
| 94 |
+
"**Functionality:** Analyzes and describes the content of images, making visual information accessible to those who are visually impaired.\n"
|
| 95 |
+
"**Target Audience:** Individuals with visual impairments and those needing assistance in understanding visual content.")
|
| 96 |
+
gr.Markdown("Supported Input: **JPEG, PNG, BMP, GIF**. \nOutput: **Text description**.")
|
| 97 |
image_input = gr.Image(label="Upload an Image")
|
| 98 |
image_desc_output = gr.Textbox(label="Image Description")
|
| 99 |
image_desc_button = gr.Button("Describe Image")
|
| 100 |
image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
|
| 101 |
|
| 102 |
+
# Video Description Section
|
| 103 |
+
gr.Markdown("<h2>Video Description</h2>")
|
| 104 |
+
gr.Markdown("**Core Idea:** Describe video content through generated text.\n"
|
| 105 |
+
"**Functionality:** Provides textual descriptions of video frames, aiding understanding for those who cannot see the video.\n"
|
| 106 |
+
"**Target Audience:** Individuals with visual impairments and those needing assistance in interpreting video content.")
|
| 107 |
+
gr.Markdown("Supported Input: **MP4, AVI, MKV**. \nOutput: **List of text descriptions**.")
|
| 108 |
video_input = gr.File(label="Upload a Video")
|
| 109 |
video_desc_output = gr.Textbox(label="Video Descriptions")
|
| 110 |
video_desc_button = gr.Button("Describe Video")
|