Spaces:

NLPV
/

ReadingTestHindi

Sleeping

App Files Files Community

NLPV commited on Jul 17, 2025

Commit

0b42505

verified ·

1 Parent(s): 60aad94

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -16

app.py CHANGED Viewed

@@ -1,13 +1,18 @@
 import gradio as gr
 from gtts import gTTS
 import tempfile
 import difflib
 import pandas as pd
 from Levenshtein import distance as lev_distance
-from indic_asr import load_model, ModelType
-# Load IndicWav2Vec model for Hindi
-model = load_model(lang="hi", model_type=ModelType.WAV2VEC2)
 def play_text(text):
     tts = gTTS(text=text, lang='hi', slow=False)
@@ -63,28 +68,33 @@ def calculate_accuracy(expected, transcribed):
 def transcribe_audio(audio_path, original_text):
     try:
-        # Use IndicWav2Vec for transcription
-        transcription = model.transcribe(audio_path).strip()
         errors = compare_hindi_sentences(original_text, transcription)
         df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
-        # Speaking speed: estimate from audio file length
-        import wave
-        with wave.open(audio_path, 'r') as wav_file:
-            frames = wav_file.getnframes()
-            rate = wav_file.getframerate()
-            duration = frames / float(rate)
         transcribed_words = transcription.strip().split()
         speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
         # Accuracy
         accuracy = calculate_accuracy(original_text, transcription)
-        result_dict = {
             "📝 Transcribed Text": transcription,
             "⏱️ Speaking Speed (words/sec)": speed,
-            "✅ Reading Accuracy (%)": accuracy,
         }
-        return result_dict, df_errors
     except Exception as e:
         return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])

 import gradio as gr
 from gtts import gTTS
 import tempfile
+import os
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torchaudio
 import difflib
 import pandas as pd
 from Levenshtein import distance as lev_distance
+# Load AI4Bharat Hindi model & processor (public model on Hugging Face)
+MODEL_NAME = "ai4bharat/indicwav2vec-hindi"
+processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
+model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME)
 def play_text(text):
     tts = gTTS(text=text, lang='hi', slow=False)
 def transcribe_audio(audio_path, original_text):
     try:
+        waveform, sample_rate = torchaudio.load(audio_path)
+        if waveform.shape[0] > 1:
+            waveform = waveform.mean(dim=0, keepdim=True)
+        if sample_rate != 16000:
+            transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
+            waveform = transform(waveform)
+        waveform = waveform / waveform.abs().max()
+        input_values = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt").input_values
+        with torch.no_grad():
+            logits = model(input_values).logits
+        predicted_ids = torch.argmax(logits, dim=-1)
+        transcription = processor.decode(predicted_ids[0]).strip()
+        # Error analysis
         errors = compare_hindi_sentences(original_text, transcription)
         df_errors = pd.DataFrame(errors, columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])
+        # Speaking speed
         transcribed_words = transcription.strip().split()
+        duration = waveform.shape[1] / 16000
         speed = round(len(transcribed_words) / duration, 2) if duration > 0 else 0
         # Accuracy
         accuracy = calculate_accuracy(original_text, transcription)
+        result = {
             "📝 Transcribed Text": transcription,
             "⏱️ Speaking Speed (words/sec)": speed,
+            "✅ Reading Accuracy (%)": accuracy
         }
+        return result, df_errors
     except Exception as e:
         return {"error": str(e)}, pd.DataFrame(columns=["बिगड़ा हुआ शब्द", "संभावित सही शब्द", "गलती का प्रकार"])