Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,9 @@ from pydub import AudioSegment
|
|
| 8 |
|
| 9 |
# Step 1: Transcribe the audio file
|
| 10 |
def transcribe_audio(audio):
|
|
|
|
|
|
|
|
|
|
| 11 |
recognizer = sr.Recognizer()
|
| 12 |
audio_format = audio.split('.')[-1].lower()
|
| 13 |
|
|
@@ -43,14 +46,17 @@ def create_pronunciation_audio(word):
|
|
| 43 |
audio_buffer = io.BytesIO()
|
| 44 |
tts.save(audio_buffer)
|
| 45 |
audio_buffer.seek(0)
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# Step 3: Compare the transcribed text with the input paragraph
|
| 49 |
def compare_texts(reference_text, transcribed_text):
|
| 50 |
word_scores = []
|
| 51 |
reference_words = reference_text.split()
|
| 52 |
transcribed_words = transcribed_text.split()
|
| 53 |
-
incorrect_words_audios = [] # Store audio
|
| 54 |
|
| 55 |
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
|
| 56 |
similarity_score = round(sm.ratio() * 100, 2)
|
|
@@ -72,10 +78,8 @@ def compare_texts(reference_text, transcribed_text):
|
|
| 72 |
# Incorrect words in red
|
| 73 |
html_output += f'<span style="color: red;">{word}</span> '
|
| 74 |
# Create pronunciation audio for the incorrect word
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
audio_base64 = audio_buffer.getvalue().hex()
|
| 78 |
-
incorrect_words_audios.append((word, audio_base64))
|
| 79 |
except IndexError:
|
| 80 |
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
|
| 81 |
|
|
@@ -84,18 +88,23 @@ def compare_texts(reference_text, transcribed_text):
|
|
| 84 |
html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
|
| 85 |
for word, audio in incorrect_words_audios:
|
| 86 |
html_output += f'{word}: '
|
| 87 |
-
|
| 88 |
-
html_output += f'<audio controls><source src="data:audio/mp3;base64,{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
|
| 89 |
|
| 90 |
return html_output
|
| 91 |
|
| 92 |
# Step 4: Text-to-Speech Function
|
| 93 |
def text_to_speech(paragraph):
|
|
|
|
|
|
|
|
|
|
| 94 |
tts = gTTS(paragraph)
|
| 95 |
audio_buffer = io.BytesIO()
|
| 96 |
tts.save(audio_buffer)
|
| 97 |
audio_buffer.seek(0)
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
# Gradio Interface Function
|
| 101 |
def gradio_function(paragraph, audio):
|
|
|
|
| 8 |
|
| 9 |
# Step 1: Transcribe the audio file
|
| 10 |
def transcribe_audio(audio):
|
| 11 |
+
if audio is None:
|
| 12 |
+
return "No audio file provided." # Handle the case when no audio is uploaded
|
| 13 |
+
|
| 14 |
recognizer = sr.Recognizer()
|
| 15 |
audio_format = audio.split('.')[-1].lower()
|
| 16 |
|
|
|
|
| 46 |
audio_buffer = io.BytesIO()
|
| 47 |
tts.save(audio_buffer)
|
| 48 |
audio_buffer.seek(0)
|
| 49 |
+
audio_file_path = f"audio/{word}.mp3" # Save the audio to a file
|
| 50 |
+
with open(audio_file_path, 'wb') as f:
|
| 51 |
+
f.write(audio_buffer.read())
|
| 52 |
+
return audio_file_path # Return the file path instead of BytesIO
|
| 53 |
|
| 54 |
# Step 3: Compare the transcribed text with the input paragraph
|
| 55 |
def compare_texts(reference_text, transcribed_text):
|
| 56 |
word_scores = []
|
| 57 |
reference_words = reference_text.split()
|
| 58 |
transcribed_words = transcribed_text.split()
|
| 59 |
+
incorrect_words_audios = [] # Store audio paths for incorrect words
|
| 60 |
|
| 61 |
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
|
| 62 |
similarity_score = round(sm.ratio() * 100, 2)
|
|
|
|
| 78 |
# Incorrect words in red
|
| 79 |
html_output += f'<span style="color: red;">{word}</span> '
|
| 80 |
# Create pronunciation audio for the incorrect word
|
| 81 |
+
audio_file_path = create_pronunciation_audio(word)
|
| 82 |
+
incorrect_words_audios.append((word, audio_file_path))
|
|
|
|
|
|
|
| 83 |
except IndexError:
|
| 84 |
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
|
| 85 |
|
|
|
|
| 88 |
html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
|
| 89 |
for word, audio in incorrect_words_audios:
|
| 90 |
html_output += f'{word}: '
|
| 91 |
+
html_output += f'<audio controls><source src="{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
|
|
|
|
| 92 |
|
| 93 |
return html_output
|
| 94 |
|
| 95 |
# Step 4: Text-to-Speech Function
|
| 96 |
def text_to_speech(paragraph):
|
| 97 |
+
if not paragraph:
|
| 98 |
+
return None # Handle the case when no text is provided
|
| 99 |
+
|
| 100 |
tts = gTTS(paragraph)
|
| 101 |
audio_buffer = io.BytesIO()
|
| 102 |
tts.save(audio_buffer)
|
| 103 |
audio_buffer.seek(0)
|
| 104 |
+
audio_file_path = "audio/text_to_speech.mp3" # Save the audio to a file
|
| 105 |
+
with open(audio_file_path, 'wb') as f:
|
| 106 |
+
f.write(audio_buffer.read())
|
| 107 |
+
return audio_file_path # Return the file path instead of BytesIO
|
| 108 |
|
| 109 |
# Gradio Interface Function
|
| 110 |
def gradio_function(paragraph, audio):
|