speech_recognize

Runtime error

App Files Files Community

mr2along commited on Oct 23, 2024

Commit

006f012

verified ·

1 Parent(s): 52415e2

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -67

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import requests
 import speech_recognition as sr
@@ -9,11 +10,17 @@ from pydub import AudioSegment
 import time
 import eng_to_ipa as ipa
-# Create audio directory if it doesn't exist
-if not os.path.exists('audio'):
-    os.makedirs('audio')
-# Step 2: Create pronunciation audio for incorrect words
 def upfilepath(local_filename):
     ts = time.time()
     upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
@@ -21,72 +28,18 @@ def upfilepath(local_filename):
     try:
         response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
         if response.status_code == 200:
             result = response.json()
             extracted_path = result[0]
             return extracted_path
         else:
             return None
     except requests.exceptions.Timeout:
         return "Request timed out. Please try again."
     except Exception as e:
         return f"An error occurred: {e}"
-# Step 1: Transcribe the audio file
-def transcribe_audio(audio):
-    if audio is None:
-        return "No audio file provided."
-    recognizer = sr.Recognizer()
-    # Check if the file exists
-    if not os.path.isfile(audio):
-        return "Audio file not found."
-    audio_format = audio.split('.')[-1].lower()
-    if audio_format != 'wav':
-        try:
-            audio_segment = AudioSegment.from_file(audio)
-            wav_path = audio.replace(audio_format, 'wav')
-            audio_segment.export(wav_path, format='wav')
-            audio = wav_path
-        except Exception as e:
-            return f"Error converting audio: {e}"
-    audio_file = sr.AudioFile(audio)
-    with audio_file as source:
-        audio_data = recognizer.record(source)
-    try:
-        transcription = recognizer.recognize_google(audio_data)
-        return transcription
-    except sr.UnknownValueError:
-        return "Google Speech Recognition could not understand the audio."
-    except sr.RequestError as e:
-        return f"Error with Google Speech Recognition service: {e}"
-# Function to get IPA transcription
-def ipa_transcription(sentence):
-    try:
-        ipa_text = ipa.convert(sentence)
-        return ipa_text
-    except Exception as e:
-        return f"Error during IPA transcription: {e}"
-# Step 2: Create pronunciation audio for incorrect words (locally)
-def create_pronunciation_audio(word):
-    try:
-        tts = gTTS(word)
-        audio_file_path = f"audio/{word}.mp3"
-        tts.save(audio_file_path)
-        return audio_file_path  # Return the local path instead of uploading
-    except Exception as e:
-        return f"Failed to create pronunciation audio: {e}"
-# Step 3: Compare the transcribed text with the input paragraph
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
@@ -95,7 +48,7 @@ def compare_texts(reference_text, transcribed_text):
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
-    # Construct HTML output with detailed fidelity class
     html_output = f"<strong>Fidelity Class:</strong> "
     if similarity_score >= 85:
         html_output += f"<strong>GOOD (>=85%)</strong><br>"
@@ -108,10 +61,10 @@ def compare_texts(reference_text, transcribed_text):
     html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
     html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
-    html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>"  # Display IPA transcription
     html_output += "<strong>Word Score List:</strong><br>"
-    # Generate colored word score list
     for i, word in enumerate(reference_words):
         try:
             if word.lower() == transcribed_words[i].lower():
@@ -132,15 +85,14 @@ def compare_texts(reference_text, transcribed_text):
     if incorrect_words_audios:
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
         for word, audio in incorrect_words_audios:
-            suggestion = difflib.get_close_matches(word, reference_words, n=1)
-            suggestion_text = f" (Did you mean: <em>{suggestion[0]}</em>?)" if suggestion else ""
-            up_audio = upfilepath(audio)
-            audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"
             html_output += f'{word}: '
-            html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio>{suggestion_text}<br>'
     return [html_output]
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     if not paragraph:

+# Import required libraries
 import os
 import requests
 import speech_recognition as sr
 import time
 import eng_to_ipa as ipa
+# Function to create pronunciation audio
+def create_pronunciation_audio(word):
+    try:
+        tts = gTTS(word)
+        audio_file_path = f"audio/{word}.mp3"
+        tts.save(audio_file_path)
+        return audio_file_path  # Return the local path instead of uploading
+    except Exception as e:
+        return f"Failed to create pronunciation audio: {e}"
+# Function to upload audio files to the server
 def upfilepath(local_filename):
     ts = time.time()
     upload_url = f"https://mr2along-speech-recognize.hf.space/gradio_api/upload?upload_id={ts}"
     try:
         response = requests.post(upload_url, files=files, timeout=30)  # Set timeout (e.g., 30 seconds)
         if response.status_code == 200:
             result = response.json()
             extracted_path = result[0]
             return extracted_path
         else:
             return None
     except requests.exceptions.Timeout:
         return "Request timed out. Please try again."
     except Exception as e:
         return f"An error occurred: {e}"
+# Update the compare_texts function
 def compare_texts(reference_text, transcribed_text):
     reference_words = reference_text.split()
     transcribed_words = transcribed_text.split()
     sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
     similarity_score = round(sm.ratio() * 100, 2)
+    # Construct HTML output
     html_output = f"<strong>Fidelity Class:</strong> "
     if similarity_score >= 85:
         html_output += f"<strong>GOOD (>=85%)</strong><br>"
     html_output += f"<strong>Quality Score:</strong> {similarity_score}%<br>"
     html_output += f"<strong>Transcribed Text:</strong> {transcribed_text}<br>"
+    html_output += f"<strong>IPA Transcription:</strong> {ipa_transcription(reference_text)}<br>"
     html_output += "<strong>Word Score List:</strong><br>"
+    # Generate colored word score list and audio links
     for i, word in enumerate(reference_words):
         try:
             if word.lower() == transcribed_words[i].lower():
     if incorrect_words_audios:
         html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
         for word, audio in incorrect_words_audios:
+            up_audio = upfilepath(audio)  # Upload the audio
+            audio_src = f"https://mr2along-speech-recognize.hf.space/gradio_api/file={up_audio}"  # Use the upload URL
             html_output += f'{word}: '
+            html_output += f'<audio controls><source src="{audio_src}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
     return [html_output]
 # Step 4: Text-to-Speech Function
 def text_to_speech(paragraph):
     if not paragraph: