Spaces:
Runtime error
Runtime error
| #@markdown Language Application: WER, Fluency (in N of pauses), WPM (Words per minute) | |
| import gradio as gr | |
| import speech_recognition as sr | |
| from Levenshtein import distance as lev_distance, ratio | |
| import tempfile | |
| import soundfile as sf | |
| import librosa | |
| def analyze_speech(file_info): | |
| r = sr.Recognizer() | |
| with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile: | |
| # Write the sound file to the temporary file | |
| sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV') | |
| tmpfile.seek(0) | |
| # Load audio for pause analysis and speech rate | |
| y, sr_lib = librosa.load(tmpfile.name, sr=None) # Load the file with the original sampling rate | |
| duration = librosa.get_duration(y=y, sr=sr_lib) | |
| # Detect pauses | |
| pause_frames = librosa.effects.split(y, top_db=32) | |
| pauses = [(start, end) for start, end in pause_frames if (end - start) / sr_lib > 0.5] | |
| num_pauses = len(pauses) | |
| with sr.AudioFile(tmpfile.name) as source: | |
| audio_data = r.record(source) | |
| text = r.recognize_google(audio_data) | |
| return text, num_pauses, duration, len(text.split()) | |
| def calculate_wer(reference, hypothesis): | |
| ref_words = reference.split() | |
| hyp_words = hypothesis.split() | |
| edit_distance = lev_distance(ref_words, hyp_words) | |
| wer = edit_distance / len(ref_words) if ref_words else float('inf') # Avoid division by zero | |
| return wer | |
| def pronunciation_correction(expected_text, file_info): | |
| user_spoken_text, num_pauses, duration, total_words = analyze_speech(file_info) | |
| wer = calculate_wer(expected_text.lower(), user_spoken_text.lower()) | |
| wpm = total_words / (duration / 60) if duration > 0 else 0 | |
| similarity = ratio(expected_text.lower(), user_spoken_text.lower()) | |
| feedback = "Excellent pronunciation!" if similarity >= 0.9 else \ | |
| "Good pronunciation!" if similarity >= 0.7 else \ | |
| "Needs improvement." if similarity >= 0.5 else \ | |
| "Poor pronunciation, try to focus more on clarity." | |
| description = f"WER: {wer:.2f}, Fluency: {num_pauses} pauses, {wpm:.0f} WPM" | |
| return feedback, description | |
| with gr.Blocks() as app: | |
| with gr.Row(): | |
| text_input = gr.Textbox(label="Enter or paste your text here") | |
| audio_input = gr.Audio(label="Upload Audio File", type="numpy") | |
| check_pronunciation_button = gr.Button("Check Pronunciation") | |
| pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback") | |
| pronunciation_details = gr.Textbox(label="Detailed Metrics") | |
| check_pronunciation_button.click( | |
| pronunciation_correction, | |
| inputs=[text_input, audio_input], | |
| outputs=[pronunciation_feedback, pronunciation_details] | |
| ) | |
| app.launch(debug=True) | |