File size: 2,008 Bytes
8c5429a fbe7334 c57c8d4 89b7da4 8c5429a 89b7da4 fbe7334 8c5429a c57c8d4 fbe7334 89b7da4 8c5429a 89b7da4 051e28c c57c8d4 89b7da4 051e28c 89b7da4 051e28c 89b7da4 c57c8d4 89b7da4 c57c8d4 8c5429a c57c8d4 89b7da4 354dd60 8c5429a c57c8d4 8c5429a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | import gradio as gr
from transformers import pipeline
import torch
import librosa
import torch.nn.functional as F
# Load the engine
print("Loading Strict Pronunciation Engine...")
asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
def assess_pronunciation(audio_filepath, target_text):
if not audio_filepath or not target_text:
return {"error": "Missing input"}
# --- FIXED INDENTATION STARTS HERE ---
try:
# 1. Process Audio
audio, sr = librosa.load(audio_filepath, sr=16000)
# 2. Strict Scoring (Confidence Analysis)
# We check how 'confident' the model is about your sounds
with torch.no_grad():
logits = asr_pipe.model(torch.tensor(audio).unsqueeze(0)).logits
probs = F.softmax(logits, dim=-1)
# We calculate the average confidence across the whole clip
confidence = float(torch.mean(torch.max(probs, dim=-1).values))
# 3. Transcription for feedback
transcription_result = asr_pipe(audio_filepath)
said = transcription_result["text"].lower()
# Strict Logic: Penalty for thick accents or mumbling
# We scale the 0-1 confidence into a 0-100 score with a difficulty curve
accuracy = round((confidence ** 2) * 100)
# Fluency calculation (Characters per second)
duration = len(audio) / sr
fluency = min(100, round((len(said) / max(duration, 1)) * 10))
return {
"accuracy_score": accuracy,
"fluency_score": fluency,
"completeness_score": 100 if accuracy > 70 else 80,
"transcription": said
}
except Exception as e:
return {"error": str(e)}
# Gradio 3 Interface
interface = gr.Interface(
fn=assess_pronunciation,
inputs=[gr.Audio(source="upload", type="filepath"), gr.Textbox(label="Target Text")],
outputs=gr.JSON(),
)
if __name__ == "__main__":
interface.launch() |