Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
|
|
| 6 |
import torch
|
| 7 |
from PIL import Image
|
| 8 |
import cv2
|
|
|
|
| 9 |
|
| 10 |
# Text-to-Speech function
|
| 11 |
def text_to_speech(text):
|
|
@@ -17,6 +18,15 @@ def text_to_speech(text):
|
|
| 17 |
# Speech-to-Text function
|
| 18 |
def speech_to_text(audio):
|
| 19 |
recognizer = sr.Recognizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
try:
|
| 21 |
with sr.AudioFile(audio) as source:
|
| 22 |
audio_data = recognizer.record(source)
|
|
@@ -82,8 +92,8 @@ def main():
|
|
| 82 |
gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
|
| 83 |
"**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
|
| 84 |
"**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
|
| 85 |
-
gr.Markdown("Supported Input: **WAV, FLAC, AIFF (
|
| 86 |
-
stt_input = gr.Audio(label="Record Audio", type="filepath")
|
| 87 |
stt_button = gr.Button("Convert Speech to Text")
|
| 88 |
stt_output = gr.Textbox(label="Speech-to-Text Output")
|
| 89 |
stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
|
|
|
|
| 6 |
import torch
|
| 7 |
from PIL import Image
|
| 8 |
import cv2
|
| 9 |
+
from pydub import AudioSegment
|
| 10 |
|
| 11 |
# Text-to-Speech function
|
| 12 |
def text_to_speech(text):
|
|
|
|
| 18 |
# Speech-to-Text function
|
| 19 |
def speech_to_text(audio):
|
| 20 |
recognizer = sr.Recognizer()
|
| 21 |
+
|
| 22 |
+
# Check if the uploaded file is an MP3
|
| 23 |
+
if audio.endswith('.mp3'):
|
| 24 |
+
# Convert MP3 to WAV
|
| 25 |
+
audio_segment = AudioSegment.from_mp3(audio)
|
| 26 |
+
wav_file = "temp.wav"
|
| 27 |
+
audio_segment.export(wav_file, format="wav")
|
| 28 |
+
audio = wav_file # Update audio to the converted file
|
| 29 |
+
|
| 30 |
try:
|
| 31 |
with sr.AudioFile(audio) as source:
|
| 32 |
audio_data = recognizer.record(source)
|
|
|
|
| 92 |
gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
|
| 93 |
"**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
|
| 94 |
"**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
|
| 95 |
+
gr.Markdown("Supported Input: **WAV, FLAC, AIFF, MP3 (converted to WAV)**. \nOutput: **Transcribed text**.")
|
| 96 |
+
stt_input = gr.Audio(label="Record or Upload Audio", type="filepath")
|
| 97 |
stt_button = gr.Button("Convert Speech to Text")
|
| 98 |
stt_output = gr.Textbox(label="Speech-to-Text Output")
|
| 99 |
stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
|