Spaces:

akazmi
/

hackaton1

Sleeping

akazmi commited on Nov 5, 2024

Commit

17ee1ec

verified ·

1 Parent(s): a39b8cb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 from PIL import Image
 import cv2
 # Text-to-Speech function
 def text_to_speech(text):
@@ -17,6 +18,15 @@ def text_to_speech(text):
 # Speech-to-Text function
 def speech_to_text(audio):
     recognizer = sr.Recognizer()
     try:
         with sr.AudioFile(audio) as source:
             audio_data = recognizer.record(source)
@@ -82,8 +92,8 @@ def main():
         gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
                     "**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
                     "**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
-        gr.Markdown("Supported Input: **WAV, FLAC, AIFF (or Microphone Input)**. \nOutput: **Transcribed text**.")
-        stt_input = gr.Audio(label="Record Audio", type="filepath")
         stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
         stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)

 import torch
 from PIL import Image
 import cv2
+from pydub import AudioSegment
 # Text-to-Speech function
 def text_to_speech(text):
 # Speech-to-Text function
 def speech_to_text(audio):
     recognizer = sr.Recognizer()
+    # Check if the uploaded file is an MP3
+    if audio.endswith('.mp3'):
+        # Convert MP3 to WAV
+        audio_segment = AudioSegment.from_mp3(audio)
+        wav_file = "temp.wav"
+        audio_segment.export(wav_file, format="wav")
+        audio = wav_file  # Update audio to the converted file
     try:
         with sr.AudioFile(audio) as source:
             audio_data = recognizer.record(source)
         gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
                     "**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
                     "**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
+        gr.Markdown("Supported Input: **WAV, FLAC, AIFF, MP3 (converted to WAV)**. \nOutput: **Transcribed text**.")
+        stt_input = gr.Audio(label="Record or Upload Audio", type="filepath")
         stt_button = gr.Button("Convert Speech to Text")
         stt_output = gr.Textbox(label="Speech-to-Text Output")
         stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)