ReadabilityTest / app.py
NLPV's picture
Update app.py
dec117d verified
raw
history blame
4.98 kB
import gradio as gr
from gtts import gTTS
import tempfile
import difflib
import pandas as pd
from Levenshtein import distance as lev_distance
import whisper
import string
# Load Whisper model once (choose "small" or "medium" for better results)
#model = whisper.load_model("small")
model = whisper.load_model("large-v3")
def play_text(text):
tts = gTTS(text=text, lang='hi', slow=False)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
tts.save(temp_file.name)
return temp_file.name
def get_error_type(asr_word, correct_word):
if not asr_word:
return "Missing word"
if not correct_word:
return "अतिरिक्त शब्द"
if lev_distance(asr_word, correct_word) <= 2:
return "उच्चारण दोष (Pronunciation Errors) "
set1, set2 = set(asr_word), set(correct_word)
if set1 & set2:
return "Phonetic/Matra error"
return "Substitution/Distorted"
def compare_hindi_sentences(expected, transcribed):
expected_words = expected.strip().split()
expected_clean = expected.translate(str.maketrans('', '', string.punctuation))
expected_words = expected_clean.strip().split()
transcribed_words = transcribed.strip().split()
matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
errors = []
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
if opcode == "equal":
continue
elif opcode == "replace":
for k in range(max(i2 - i1, j2 - j1)):
asr_word = transcribed_words[i1 + k] if i1 + k < i2 else ""
correct_word = expected_words[j1 + k] if j1 + k < j2 else ""
error_type = get_error_type(asr_word, correct_word)
errors.append((asr_word, correct_word, error_type))
elif opcode == "insert":
for k in range(j1, j2):
errors.append(("", expected_words[k], "Missing word"))
elif opcode == "delete":
for k in range(i1, i2):
errors.append((transcribed_words[k], "", "Extra word"))
return errors
def calculate_accuracy(expected, transcribed):
expected_words = expected.strip().split()
transcribed_words = transcribed.strip().split()
matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
correct = 0
total = len(expected_words)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag == 'equal':
correct += (j2-j1)
accuracy = (correct / total) * 100 if total > 0 else 0
return round(accuracy, 2)
def transcribe_audio(audio_path, original_text):
try:
# Use Whisper for transcription
result = model.transcribe(audio_path, language='hi')
transcription = result['text'].strip()
# Error analysis
errors = compare_hindi_sentences(original_text, transcription)
df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
# Speaking speed
transcribed_words = transcription.strip().split()
transcribed_words = transcribed_words.translate(str.maketrans('', '', string.punctuation))
duration = result['segments'][-1]['end'] if result.get('segments') else 1.0
speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
# Accuracy
accuracy = calculate_accuracy(original_text, transcription)
result_dict = {
"📝 Transcribed Text": transcription,
"⏱️ Speaking Speed (words/sec)": speed,
"✅ Reading Accuracy (%)": accuracy,
}
return result_dict, df_errors
except Exception as e:
return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
with gr.Blocks() as app:
gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (OpenAI Whisper)")
with gr.Row():
input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
play_button = gr.Button("🔊 Listen to Text")
audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath")
play_button.click(play_text, inputs=input_text, outputs=audio_output)
gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
submit_button = gr.Button("✅ Submit Recording for Checking")
output = gr.JSON(label="Results")
error_table = gr.Dataframe(label="गलती तालिका (Error Table)")
submit_button.click(
transcribe_audio,
inputs=[audio_input, input_text],
outputs=[output, error_table]
)
app.launch()