Spaces:

NLPV
/

ReadabilityTest

Sleeping

App Files Files Community

ReadabilityTest / app.py

NLPV

Update app.py

f0b2a66 verified 9 months ago

raw

history blame

4.62 kB

	import gradio as gr
	from gtts import gTTS
	import tempfile
	import os
	import torch
	import re
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import torchaudio
	import difflib
	import pandas as pd
	from Levenshtein import distance as lev_distance

	# Load AI4Bharat Hindi model & processor
	MODEL_NAME = "ai4bharat/indicwav2vec-hindi"
	processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
	model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)

	def play_text(text):
	tts = gTTS(text=text, lang='hi', slow=False)
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	tts.save(temp_file.name)
	os.system(f"start {temp_file.name}") # Windows only
	return "✅ Text is being read out. Please listen and read it yourself."

	def get_error_type(asr_word, correct_word):
	# Both words missing or extra
	if not asr_word:
	return "Missing word"
	if not correct_word:
	return "Extra word"
	# Spelling error: small Levenshtein
	if lev_distance(asr_word, correct_word) <= 2:
	return "Spelling mistake"
	# Matra/phonetic error: shared chars but wrong form
	set1, set2 = set(asr_word), set(correct_word)
	if set1 & set2:
	return "Phonetic/Matra error"
	return "Substitution/Distorted"

	def compare_hindi_sentences(expected, transcribed):
	# Split by whitespace for Hindi
	expected_words = expected.strip().split()
	transcribed_words = transcribed.strip().split()

	matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
	errors = []

	for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
	if opcode == "equal":
	continue
	elif opcode == "replace":
	for k in range(max(i2 - i1, j2 - j1)):
	asr_word = transcribed_words[i1 + k] if i1 + k < i2 else ""
	correct_word = expected_words[j1 + k] if j1 + k < j2 else ""
	error_type = get_error_type(asr_word, correct_word)
	errors.append((asr_word, correct_word, error_type))
	elif opcode == "insert":
	for k in range(j1, j2):
	errors.append(("", expected_words[k], "Missing word"))
	elif opcode == "delete":
	for k in range(i1, i2):
	errors.append((transcribed_words[k], "", "Extra word"))
	return errors

	def transcribe_audio(audio_path, original_text):
	try:
	waveform, sample_rate = torchaudio.load(audio_path)
	# Convert to mono
	if waveform.shape[0] > 1:
	waveform = waveform.mean(dim=0, keepdim=True)
	# Resample to 16000 Hz for model
	if sample_rate != 16000:
	transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
	waveform = transform(waveform)
	# Normalize to [-1, 1]
	waveform = waveform / waveform.abs().max()

	input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values

	with torch.no_grad():
	logits = model(input_values).logits
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.decode(predicted_ids[0])

	# ... rest of your error analysis

	return {
	"📝 Transcribed Text": transcription,
	# etc.
	}, df_errors
	except Exception as e:
	return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])

	with gr.Blocks() as app:
	gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")

	with gr.Row():
	input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
	play_button = gr.Button("🔊 Listen to Text")

	play_button.click(play_text, inputs=[input_text], outputs=[])

	gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
	audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")

	submit_button = gr.Button("✅ Submit Recording for Checking")
	output = gr.JSON(label="Results")
	error_table = gr.Dataframe(headers=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"], label="गलती तालिका (Error Table)")

	submit_button.click(
	transcribe_audio,
	inputs=[audio_input, input_text],
	outputs=[output, error_table]
	)

	app.launch()