ReadabilityTest / app.py
NLPV's picture
Update app.py
ea23e90 verified
import gradio as gr
from gtts import gTTS
import tempfile
import difflib
import pandas as pd
from Levenshtein import distance as lev_distance
import whisper
import string
# Load Whisper model once (choose "small" or "medium" for better results)
#model = whisper.load_model("small")
model = whisper.load_model("large-v3")
def play_text(text):
tts = gTTS(text=text, lang='hi', slow=False)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
tts.save(temp_file.name)
return temp_file.name
def get_error_type(asr_word, correct_word):
if not asr_word:
return "Missing word"
if not correct_word:
return "अतिरिक्त शब्द"
if lev_distance(asr_word, correct_word) <= 2:
return "उच्चारण दोष (Pronunciation Errors) "
set1, set2 = set(asr_word), set(correct_word)
if set1 & set2:
return "Phonetic/Matra error"
return "Substitution/Distorted"
def compare_hindi_sentences(expected, transcribed):
expected_words = expected.strip().split()
expected_clean = expected.translate(str.maketrans('', '', string.punctuation))
expected_words = expected_clean.strip().split()
transcribed = transcribed.translate(str.maketrans('', '', string.punctuation))
transcribed_words = transcribed.strip().split()
matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
errors = []
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
if opcode == "equal":
continue
elif opcode == "replace":
for k in range(max(i2 - i1, j2 - j1)):
asr_word = transcribed_words[i1 + k] if i1 + k < i2 else ""
correct_word = expected_words[j1 + k] if j1 + k < j2 else ""
error_type = get_error_type(asr_word, correct_word)
errors.append((asr_word, correct_word, error_type))
elif opcode == "insert":
for k in range(j1, j2):
errors.append(("", expected_words[k], "Missing word"))
elif opcode == "delete":
for k in range(i1, i2):
errors.append((transcribed_words[k], "", "Extra word"))
return errors
def calculate_accuracy(expected, transcribed):
expected_words = expected.strip().split()
transcribed = transcribed.translate(str.maketrans('', '', string.punctuation))
transcribed = transcribed.replace(',',' ')
transcribed_words = transcribed.strip().split()
matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
correct = 0
total = len(expected_words)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'equal':
correct += (j2-j1)
accuracy = (correct / total) * 100 if total > 0 else 0
return round(accuracy, 2)
def transcribe_audio(audio_path, original_text):
try:
# Use Whisper for transcription
result = model.transcribe(audio_path, language='hi')
transcription = result['text'].strip()
# Error analysis
errors = compare_hindi_sentences(original_text, transcription)
df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
# Speaking speed
transcribed_words = transcription.strip().split()
duration = result['segments'][-1]['end'] if result.get('segments') else 1.0
speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
# Accuracy
accuracy = calculate_accuracy(original_text, transcription)
result_dict = {
"📝 Transcribed Text": transcription,
"⏱️ Speaking Speed (words/sec)": speed,
"✅ Reading Accuracy (%)": accuracy,
}
return result_dict, df_errors
except Exception as e:
return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
with gr.Blocks() as app:
gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (OpenAI Whisper)")
with gr.Row():
input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
play_button = gr.Button("🔊 Listen to Text")
audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath")
play_button.click(play_text, inputs=input_text, outputs=audio_output)
gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
submit_button = gr.Button("✅ Submit Recording for Checking")
output = gr.JSON(label="Results")
error_table = gr.Dataframe(label="गलती तालिका (Error Table)")
submit_button.click(
transcribe_audio,
inputs=[audio_input, input_text],
outputs=[output, error_table]
)
app.launch()