akazmi commited on
Commit
17ee1ec
·
verified ·
1 Parent(s): a39b8cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -6,6 +6,7 @@ from transformers import BlipProcessor, BlipForConditionalGeneration
6
  import torch
7
  from PIL import Image
8
  import cv2
 
9
 
10
  # Text-to-Speech function
11
  def text_to_speech(text):
@@ -17,6 +18,15 @@ def text_to_speech(text):
17
  # Speech-to-Text function
18
  def speech_to_text(audio):
19
  recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
20
  try:
21
  with sr.AudioFile(audio) as source:
22
  audio_data = recognizer.record(source)
@@ -82,8 +92,8 @@ def main():
82
  gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
83
  "**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
84
  "**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
85
- gr.Markdown("Supported Input: **WAV, FLAC, AIFF (or Microphone Input)**. \nOutput: **Transcribed text**.")
86
- stt_input = gr.Audio(label="Record Audio", type="filepath")
87
  stt_button = gr.Button("Convert Speech to Text")
88
  stt_output = gr.Textbox(label="Speech-to-Text Output")
89
  stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)
 
6
  import torch
7
  from PIL import Image
8
  import cv2
9
+ from pydub import AudioSegment
10
 
11
  # Text-to-Speech function
12
  def text_to_speech(text):
 
18
  # Speech-to-Text function
19
  def speech_to_text(audio):
20
  recognizer = sr.Recognizer()
21
+
22
+ # Check if the uploaded file is an MP3
23
+ if audio.endswith('.mp3'):
24
+ # Convert MP3 to WAV
25
+ audio_segment = AudioSegment.from_mp3(audio)
26
+ wav_file = "temp.wav"
27
+ audio_segment.export(wav_file, format="wav")
28
+ audio = wav_file # Update audio to the converted file
29
+
30
  try:
31
  with sr.AudioFile(audio) as source:
32
  audio_data = recognizer.record(source)
 
92
  gr.Markdown("**Core Idea:** Convert spoken language into written text.\n"
93
  "**Functionality:** Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.\n"
94
  "**Target Audience:** Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.")
95
+ gr.Markdown("Supported Input: **WAV, FLAC, AIFF, MP3 (converted to WAV)**. \nOutput: **Transcribed text**.")
96
+ stt_input = gr.Audio(label="Record or Upload Audio", type="filepath")
97
  stt_button = gr.Button("Convert Speech to Text")
98
  stt_output = gr.Textbox(label="Speech-to-Text Output")
99
  stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)