Spaces:

akazmi
/

hackaton1

Sleeping

App Files Files Community

akazmi commited on Nov 5, 2024

Commit

6537e41

verified ·

1 Parent(s): 5af1eca

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -12

app.py CHANGED Viewed

@@ -17,15 +17,17 @@ def text_to_speech(text):
 # Speech-to-Text function
 def speech_to_text(audio):
     recognizer = sr.Recognizer()
-    with sr.AudioFile(audio) as source:
-        audio_data = recognizer.record(source)
-        try:
             text = recognizer.recognize_google(audio_data)
             return text
-        except sr.UnknownValueError:
-            return "Sorry, I could not understand the audio."
-        except sr.RequestError as e:
-            return f"Could not request results; {e}"
 # Image Description function
 def generate_image_description(image):
@@ -56,33 +58,53 @@ def generate_video_description(video):
         frame_count += 1
     cap.release()
-    return descriptions
 # Gradio Interface
 def main():
     with gr.Blocks() as app:
         gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
-        # Text-to-Speech
         with gr.Row():
             text_input = gr.Textbox(label="Enter text for Text-to-Speech")
             tts_button = gr.Button("Convert to Speech")
             tts_output = gr.Audio(label="TTS Output")
             tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
-        # Speech-to-Text
         stt_input = gr.Audio(label="Record Audio", type="filepath")
         stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
         stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
-        # Image Description
         image_input = gr.Image(label="Upload an Image")
         image_desc_output = gr.Textbox(label="Image Description")
         image_desc_button = gr.Button("Describe Image")
         image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
-        # Video Description
         video_input = gr.File(label="Upload a Video")
         video_desc_output = gr.Textbox(label="Video Descriptions")
         video_desc_button = gr.Button("Describe Video")

 # Speech-to-Text function
 def speech_to_text(audio):
     recognizer = sr.Recognizer()
+    try:
+        with sr.AudioFile(audio) as source:
+            audio_data = recognizer.record(source)
             text = recognizer.recognize_google(audio_data)
             return text
+    except sr.UnknownValueError:
+        return "Sorry, I could not understand the audio."
+    except sr.RequestError as e:
+        return f"Could not request results; {e}"
+    except Exception as e:
+        return f"An error occurred: {e}"
 # Image Description function
 def generate_image_description(image):
         frame_count += 1
     cap.release()
+    return descriptions if descriptions else ["No frames to describe."]
 # Gradio Interface
 def main():
     with gr.Blocks() as app:
         gr.Markdown("<h1>AI-Powered Accessibility Tools</h1>")
+        # Text-to-Speech Section
+        gr.Markdown("<h2>Text-to-Speech</h2>")
+        gr.Markdown("**Core Idea:** Create natural-sounding speech from text input.\n"
+                    "**Functionality:** Converts written text into spoken words, helping individuals with reading difficulties or visual impairments.\n"
+                    "**Target Audience:** People with visual impairments, reading disabilities, and those who prefer audio content.")
+        gr.Markdown("Supported Input: **Plain text**. \nOutput: **MP3 audio file**.")
         with gr.Row():
             text_input = gr.Textbox(label="Enter text for Text-to-Speech")
             tts_button = gr.Button("Convert to Speech")
             tts_output = gr.Audio(label="TTS Output")
             tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)
+        # Speech-to-Text Section
+        gr.Markdown("<h2>Speech-to-Text</h2>")
+        gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
+                    "**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
+                    "**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
+        gr.Markdown("Supported Input: **WAV, FLAC, AIFF (or Microphone Input)**. \nOutput: **Transcribed text**.")
         stt_input = gr.Audio(label="Record Audio", type="filepath")
         stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
         stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
+        # Image Description Section
+        gr.Markdown("<h2>Image Description</h2>")
+        gr.Markdown("**Core Idea:** Generate descriptive text for images.\n"
+                    "**Functionality:** Analyzes and describes the content of images, making visual information accessible to those who are visually impaired.\n"
+                    "**Target Audience:** Individuals with visual impairments and those needing assistance in understanding visual content.")
+        gr.Markdown("Supported Input: **JPEG, PNG, BMP, GIF**. \nOutput: **Text description**.")
         image_input = gr.Image(label="Upload an Image")
         image_desc_output = gr.Textbox(label="Image Description")
         image_desc_button = gr.Button("Describe Image")
         image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)
+        # Video Description Section
+        gr.Markdown("<h2>Video Description</h2>")
+        gr.Markdown("**Core Idea:** Describe video content through generated text.\n"
+                    "**Functionality:** Provides textual descriptions of video frames, aiding understanding for those who cannot see the video.\n"
+                    "**Target Audience:** Individuals with visual impairments and those needing assistance in interpreting video content.")
+        gr.Markdown("Supported Input: **MP4, AVI, MKV**. \nOutput: **List of text descriptions**.")
         video_input = gr.File(label="Upload a Video")
         video_desc_output = gr.Textbox(label="Video Descriptions")
         video_desc_button = gr.Button("Describe Video")