speech_recognize

Runtime error

App Files Files Community

mr2along commited on Oct 11, 2024

Commit

3e9568e

verified ·

1 Parent(s): 999ce1a

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -6

app.py CHANGED Viewed

@@ -3,14 +3,28 @@ import difflib
 import gradio as gr
 from gtts import gTTS
 import io
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
     recognizer = sr.Recognizer()
     # Convert audio into recognizable format for the Recognizer
     audio_file = sr.AudioFile(audio)
     with audio_file as source:
         audio_data = recognizer.record(source)
@@ -25,7 +39,7 @@ def transcribe_audio(audio):
 # Step 2: Create pronunciation audio for incorrect words
 def create_pronunciation_audio(word):
-    tts = gTTS(word, lang='en')  # Specify the language for TTS
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)
@@ -49,7 +63,7 @@ def compare_texts(reference_text, transcribed_text):
     # Generate colored word score list
     for i, word in enumerate(reference_words):
-        if i < len(transcribed_words):
             if word.lower() == transcribed_words[i].lower():
                 html_output += f'<span style="color: green;">{word}</span> '  # Correct words in green
             elif difflib.get_close_matches(word, transcribed_words):
@@ -62,8 +76,7 @@ def compare_texts(reference_text, transcribed_text):
                 # Encode the audio as base64 for playback
                 audio_base64 = audio_buffer.getvalue().hex()
                 incorrect_words_audios.append((word, audio_base64))
-        else:
-            # If reference word has no corresponding transcribed word
             html_output += f'<span style="color: red;">{word}</span> '  # Words in reference that were not transcribed
     # Provide audio for incorrect words
@@ -78,7 +91,7 @@ def compare_texts(reference_text, transcribed_text):
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
-    tts = gTTS(paragraph, lang='en')  # Specify the language for TTS
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)

 import gradio as gr
 from gtts import gTTS
 import io
+import os
+from pydub import AudioSegment
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
     recognizer = sr.Recognizer()
+    audio_format = audio.split('.')[-1].lower()
+    # Convert to WAV if the audio is not in a supported format
+    if audio_format != 'wav':
+        try:
+            # Load the audio file with pydub
+            audio_segment = AudioSegment.from_file(audio)
+            wav_path = audio.replace(audio_format, 'wav')
+            audio_segment.export(wav_path, format='wav')  # Convert to WAV
+            audio = wav_path  # Update audio path to the converted file
+        except Exception as e:
+            return f"Error converting audio: {e}"
     # Convert audio into recognizable format for the Recognizer
     audio_file = sr.AudioFile(audio)
     with audio_file as source:
         audio_data = recognizer.record(source)
 # Step 2: Create pronunciation audio for incorrect words
 def create_pronunciation_audio(word):
+    tts = gTTS(word)
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)
     # Generate colored word score list
     for i, word in enumerate(reference_words):
+        try:
             if word.lower() == transcribed_words[i].lower():
                 html_output += f'<span style="color: green;">{word}</span> '  # Correct words in green
             elif difflib.get_close_matches(word, transcribed_words):
                 # Encode the audio as base64 for playback
                 audio_base64 = audio_buffer.getvalue().hex()
                 incorrect_words_audios.append((word, audio_base64))
+        except IndexError:
             html_output += f'<span style="color: red;">{word}</span> '  # Words in reference that were not transcribed
     # Provide audio for incorrect words
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
+    tts = gTTS(paragraph)
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)