Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gtts import gTTS | |
| import tempfile | |
| import difflib | |
| import pandas as pd | |
| from Levenshtein import distance as lev_distance | |
| import whisper | |
| import string | |
| # Load Whisper model once (choose "small" or "medium" for better results) | |
| #model = whisper.load_model("small") | |
| model = whisper.load_model("large-v3") | |
| def play_text(text): | |
| tts = gTTS(text=text, lang='hi', slow=False) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') | |
| tts.save(temp_file.name) | |
| return temp_file.name | |
| def get_error_type(asr_word, correct_word): | |
| if not asr_word: | |
| return "Missing word" | |
| if not correct_word: | |
| return "अतिरिक्त शब्द" | |
| if lev_distance(asr_word, correct_word) <= 2: | |
| return "उच्चारण दोष (Pronunciation Errors) " | |
| set1, set2 = set(asr_word), set(correct_word) | |
| if set1 & set2: | |
| return "Phonetic/Matra error" | |
| return "Substitution/Distorted" | |
| def compare_hindi_sentences(expected, transcribed): | |
| expected_words = expected.strip().split() | |
| expected_clean = expected.translate(str.maketrans('', '', string.punctuation)) | |
| expected_words = expected_clean.strip().split() | |
| transcribed = transcribed.translate(str.maketrans('', '', string.punctuation)) | |
| transcribed_words = transcribed.strip().split() | |
| matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words) | |
| errors = [] | |
| for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): | |
| if opcode == "equal": | |
| continue | |
| elif opcode == "replace": | |
| for k in range(max(i2 - i1, j2 - j1)): | |
| asr_word = transcribed_words[i1 + k] if i1 + k < i2 else "" | |
| correct_word = expected_words[j1 + k] if j1 + k < j2 else "" | |
| error_type = get_error_type(asr_word, correct_word) | |
| errors.append((asr_word, correct_word, error_type)) | |
| elif opcode == "insert": | |
| for k in range(j1, j2): | |
| errors.append(("", expected_words[k], "Missing word")) | |
| elif opcode == "delete": | |
| for k in range(i1, i2): | |
| errors.append((transcribed_words[k], "", "Extra word")) | |
| return errors | |
| def calculate_accuracy(expected, transcribed): | |
| expected_words = expected.strip().split() | |
| transcribed = transcribed.translate(str.maketrans('', '', string.punctuation)) | |
| transcribed = transcribed.replace(',',' ') | |
| transcribed_words = transcribed.strip().split() | |
| matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words) | |
| correct = 0 | |
| total = len(expected_words) | |
| for tag, i1, i2, j1, j2 in matcher.get_opcodes(): | |
| if tag == 'equal': | |
| correct += (j2-j1) | |
| accuracy = (correct / total) * 100 if total > 0 else 0 | |
| return round(accuracy, 2) | |
| def transcribe_audio(audio_path, original_text): | |
| try: | |
| # Use Whisper for transcription | |
| result = model.transcribe(audio_path, language='hi') | |
| transcription = result['text'].strip() | |
| # Error analysis | |
| errors = compare_hindi_sentences(original_text, transcription) | |
| df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"]) | |
| # Speaking speed | |
| transcribed_words = transcription.strip().split() | |
| duration = result['segments'][-1]['end'] if result.get('segments') else 1.0 | |
| speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0 | |
| # Accuracy | |
| accuracy = calculate_accuracy(original_text, transcription) | |
| result_dict = { | |
| "📝 Transcribed Text": transcription, | |
| "⏱️ Speaking Speed (words/sec)": speed, | |
| "✅ Reading Accuracy (%)": accuracy, | |
| } | |
| return result_dict, df_errors | |
| except Exception as e: | |
| return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"]) | |
| with gr.Blocks() as app: | |
| gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (OpenAI Whisper)") | |
| with gr.Row(): | |
| input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...") | |
| play_button = gr.Button("🔊 Listen to Text") | |
| audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath") | |
| play_button.click(play_text, inputs=input_text, outputs=audio_output) | |
| gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:") | |
| audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice") | |
| submit_button = gr.Button("✅ Submit Recording for Checking") | |
| output = gr.JSON(label="Results") | |
| error_table = gr.Dataframe(label="गलती तालिका (Error Table)") | |
| submit_button.click( | |
| transcribe_audio, | |
| inputs=[audio_input, input_text], | |
| outputs=[output, error_table] | |
| ) | |
| app.launch() | |