speech_recognize

Runtime error

App Files Files Community

mr2along commited on Oct 11, 2024

Commit

0f433ab

verified ·

1 Parent(s): 14735a1

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -9

app.py CHANGED Viewed

@@ -8,6 +8,9 @@ from pydub import AudioSegment
 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
     recognizer = sr.Recognizer()
     audio_format = audio.split('.')[-1].lower()
@@ -43,14 +46,17 @@ def create_pronunciation_audio(word):
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)
-    return audio_buffer
 # Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     word_scores = []
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
-    incorrect_words_audios = []  # Store audio buffers for incorrect words
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
@@ -72,10 +78,8 @@ def compare_texts(reference_text, transcribed_text):
                 # Incorrect words in red
                 html_output += f'<span style="color: red;">{word}</span> '
                 # Create pronunciation audio for the incorrect word
-                audio_buffer = create_pronunciation_audio(word)
-                # Encode the audio as base64 for playback
-                audio_base64 = audio_buffer.getvalue().hex()
-                incorrect_words_audios.append((word, audio_base64))
         except IndexError:
             html_output += f'<span style="color: red;">{word}</span> '  # Words in reference that were not transcribed
@@ -84,18 +88,23 @@ def compare_texts(reference_text, transcribed_text):
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
         for word, audio in incorrect_words_audios:
             html_output += f'{word}: '
-            # Return the audio buffer as part of the HTML output
-            html_output += f'<audio controls><source src="data:audio/mp3;base64,{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
     return html_output
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     tts = gTTS(paragraph)
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)
-    return audio_buffer
 # Gradio Interface Function
 def gradio_function(paragraph, audio):

 # Step 1: Transcribe the audio file
 def transcribe_audio(audio):
+    if audio is None:
+        return "No audio file provided."  # Handle the case when no audio is uploaded
     recognizer = sr.Recognizer()
     audio_format = audio.split('.')[-1].lower()
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)
+    audio_file_path = f"audio/{word}.mp3"  # Save the audio to a file
+    with open(audio_file_path, 'wb') as f:
+        f.write(audio_buffer.read())
+    return audio_file_path  # Return the file path instead of BytesIO
 # Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     word_scores = []
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
+    incorrect_words_audios = []  # Store audio paths for incorrect words
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
                 # Incorrect words in red
                 html_output += f'<span style="color: red;">{word}</span> '
                 # Create pronunciation audio for the incorrect word
+                audio_file_path = create_pronunciation_audio(word)
+                incorrect_words_audios.append((word, audio_file_path))
         except IndexError:
             html_output += f'<span style="color: red;">{word}</span> '  # Words in reference that were not transcribed
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
         for word, audio in incorrect_words_audios:
             html_output += f'{word}: '
+            html_output += f'<audio controls><source src="{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
     return html_output
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
+    if not paragraph:
+        return None  # Handle the case when no text is provided
     tts = gTTS(paragraph)
     audio_buffer = io.BytesIO()
     tts.save(audio_buffer)
     audio_buffer.seek(0)
+    audio_file_path = "audio/text_to_speech.mp3"  # Save the audio to a file
+    with open(audio_file_path, 'wb') as f:
+        f.write(audio_buffer.read())
+    return audio_file_path  # Return the file path instead of BytesIO
 # Gradio Interface Function
 def gradio_function(paragraph, audio):