alaatiger989's picture
Add files using upload-large-folder tool
b5e57ee verified
import os
import torch
from nemo.collections.asr.models import EncDecHybridRNNTCTCBPEModel
from nemo.collections.asr.metrics.wer import word_error_rate
# ==========================
# CONFIGURATION
# ==========================
MODEL_PATH = "output_finetuned/finetuned_model_best.nemo"
SAMPLE_AUDIO = "arabic_recording.wav"
EXPECTED_TEXT = "زيرو واحد واحد واحد واحد واحد واحد اتنين اربعة ستة"
# ==========================
# LOAD MODEL
# ==========================
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model on: {device}")
try:
model = EncDecHybridRNNTCTCBPEModel.restore_from(restore_path=MODEL_PATH, map_location=device)
model.eval()
print("✅ Model loaded successfully.")
except Exception as e:
print(f"❌ Failed to load model: {e}")
exit()
# ==========================
# TEST FUNCTION
# ==========================
def test_model(model, sample_audio, expected_text):
if not os.path.exists(sample_audio):
print(f"❌ Audio file not found: {sample_audio}")
return
print(f"\n🔍 Testing on: {sample_audio}")
# Transcribe
with torch.no_grad():
output = model.transcribe([sample_audio])
# Handle different return types
if isinstance(output, tuple):
# Sometimes returns (predictions, tokens)
prediction_list = output[0]
else:
prediction_list = output
# Ensure it's a single string
prediction = prediction_list[0] if isinstance(prediction_list, list) else prediction_list
# Display results
print(f"\nPredicted: {prediction}")
print(f"Expected : {expected_text}")
# Compute WER
wer = word_error_rate([expected_text], [prediction])
print(f"\n📊 Word Error Rate (WER): {wer:.3f}")
return prediction, wer
# ==========================
# RUN TEST
# ==========================
if __name__ == "__main__":
prediction, wer = test_model(model, SAMPLE_AUDIO, EXPECTED_TEXT)