Spaces:

NLPV
/

ReadabilityTest

Sleeping

App Files Files Community

NLPV commited on Jul 14, 2025

Commit

650c3e9

verified ·

1 Parent(s): f0b2a66

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -26

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from gtts import gTTS
 import tempfile
 import os
 import torch
-import re
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torchaudio
 import difflib
@@ -19,32 +18,26 @@ def play_text(text):
     tts = gTTS(text=text, lang='hi', slow=False)
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
     tts.save(temp_file.name)
-    os.system(f"start {temp_file.name}")  # Windows only
-    return "✅ Text is being read out. Please listen and read it yourself."
 def get_error_type(asr_word, correct_word):
-    # Both words missing or extra
     if not asr_word:
         return "Missing word"
     if not correct_word:
         return "Extra word"
-    # Spelling error: small Levenshtein
     if lev_distance(asr_word, correct_word) <= 2:
         return "Spelling mistake"
-    # Matra/phonetic error: shared chars but wrong form
     set1, set2 = set(asr_word), set(correct_word)
     if set1 & set2:
         return "Phonetic/Matra error"
     return "Substitution/Distorted"
 def compare_hindi_sentences(expected, transcribed):
-    # Split by whitespace for Hindi
     expected_words = expected.strip().split()
     transcribed_words = transcribed.strip().split()
     matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
     errors = []
     for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
         if opcode == "equal":
             continue
@@ -65,48 +58,45 @@ def compare_hindi_sentences(expected, transcribed):
 def transcribe_audio(audio_path, original_text):
     try:
         waveform, sample_rate = torchaudio.load(audio_path)
-        # Convert to mono
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0, keepdim=True)
-        # Resample to 16000 Hz for model
         if sample_rate != 16000:
             transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
             waveform = transform(waveform)
-        # Normalize to [-1, 1]
         waveform = waveform / waveform.abs().max()
         input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         transcription = processor.decode(predicted_ids[0])
-        # ... rest of your error analysis
-        return {
             "📝 Transcribed Text": transcription,
-            # etc.
-        }, df_errors
     except Exception as e:
         return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
 with gr.Blocks() as app:
     gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")
     with gr.Row():
         input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
         play_button = gr.Button("🔊 Listen to Text")
-    play_button.click(play_text, inputs=[input_text], outputs=[])
     gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
     audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
     submit_button = gr.Button("✅ Submit Recording for Checking")
     output = gr.JSON(label="Results")
-    error_table = gr.Dataframe(headers=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"], label="गलती तालिका (Error Table)")
     submit_button.click(
         transcribe_audio,
         inputs=[audio_input, input_text],

 import tempfile
 import os
 import torch
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import torchaudio
 import difflib
     tts = gTTS(text=text, lang='hi', slow=False)
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
     tts.save(temp_file.name)
+    # Return file for Gradio audio output
+    return temp_file.name
 def get_error_type(asr_word, correct_word):
     if not asr_word:
         return "Missing word"
     if not correct_word:
         return "Extra word"
     if lev_distance(asr_word, correct_word) <= 2:
         return "Spelling mistake"
     set1, set2 = set(asr_word), set(correct_word)
     if set1 & set2:
         return "Phonetic/Matra error"
     return "Substitution/Distorted"
 def compare_hindi_sentences(expected, transcribed):
     expected_words = expected.strip().split()
     transcribed_words = transcribed.strip().split()
     matcher = difflib.SequenceMatcher(None, transcribed_words, expected_words)
     errors = []
     for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
         if opcode == "equal":
             continue
 def transcribe_audio(audio_path, original_text):
     try:
         waveform, sample_rate = torchaudio.load(audio_path)
         if waveform.shape[0] > 1:
             waveform = waveform.mean(dim=0, keepdim=True)
         if sample_rate != 16000:
             transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
             waveform = transform(waveform)
         waveform = waveform / waveform.abs().max()
         input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
         with torch.no_grad():
             logits = model(input_values).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         transcription = processor.decode(predicted_ids[0])
+        # Error analysis
+        errors = compare_hindi_sentences(original_text, transcription)
+        df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
+        # Speaking speed
+        transcribed_words = transcription.strip().split()
+        duration = waveform.shape[1] / 16000
+        speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
+        result = {
             "📝 Transcribed Text": transcription,
+            "⏱️ Speaking Speed (words/sec)": speed,
+        }
+        return result, df_errors
     except Exception as e:
         return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
 with gr.Blocks() as app:
     gr.Markdown("## 🗣️ Hindi Reading & Pronunciation Practice App (AI4Bharat Model)")
     with gr.Row():
         input_text = gr.Textbox(label="Paste Hindi Text Here", placeholder="यहाँ हिंदी टेक्स्ट लिखें...")
         play_button = gr.Button("🔊 Listen to Text")
+        audio_output = gr.Audio(label="Text-to-Speech Output", type="filepath")
+    play_button.click(play_text, inputs=input_text, outputs=audio_output)
     gr.Markdown("### 🎤 Now upload or record yourself reading the text aloud below:")
     audio_input = gr.Audio(type="filepath", label="Upload or Record Your Voice")
     submit_button = gr.Button("✅ Submit Recording for Checking")
     output = gr.JSON(label="Results")
+    error_table = gr.Dataframe(label="गलती तालिका (Error Table)")
     submit_button.click(
         transcribe_audio,
         inputs=[audio_input, input_text],