UpCoder commited on
Commit
fbe7334
·
verified ·
1 Parent(s): 62ede0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -20
app.py CHANGED
@@ -1,46 +1,44 @@
1
  import gradio as gr
 
2
  import torch
3
  import librosa
4
- import numpy as np
5
- from transformers import pipeline
6
 
7
- # Load a lightweight pronunciation assessment model (based on Wav2Vec2/GOPT)
8
- # This model is designed for CPU speed and phoneme-level accuracy
9
  print("Loading Pronunciation Engine...")
10
- evaluator = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
11
 
12
  def assess_pronunciation(audio_filepath, target_text):
13
- if audio_filepath is None or not target_text:
14
- return {"error": "Missing audio or target text"}
15
-
16
  try:
17
- # 1. Transcribe the student's speech
18
- result = evaluator(audio_filepath)
19
- student_said = result["text"].lower()
20
  target_clean = target_text.lower().strip()
21
 
22
- # 2. Basic Scoring Logic (Goodness of Pronunciation)
23
- # In a production GOPT model, this compares acoustic features.
24
- # Here we use a high-accuracy string similarity for immediate results.
25
- from difflib import SequenceMatcher
26
  accuracy = SequenceMatcher(None, target_clean, student_said).ratio() * 100
27
 
28
- # Fluency is estimated based on the length/pace of the audio
29
  audio, sr = librosa.load(audio_filepath)
30
  duration = librosa.get_duration(y=audio, sr=sr)
31
  words_count = len(student_said.split())
32
- fluency = min(100, (words_count / duration) * 20) # Simple WPM heuristic
 
33
 
34
  return {
35
  "accuracy_score": round(accuracy),
36
  "fluency_score": round(fluency),
37
- "completeness_score": 100 if accuracy > 80 else round(accuracy + 5),
38
- "student_said": student_said
39
  }
40
  except Exception as e:
41
  return {"error": str(e)}
42
 
43
- # Gradio 3 API Interface
44
  interface = gr.Interface(
45
  fn=assess_pronunciation,
46
  inputs=[gr.Audio(source="upload", type="filepath"), gr.Textbox(label="Target Text")],
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import torch
4
  import librosa
5
+ from difflib import SequenceMatcher
 
6
 
7
+ # Load a fast, accurate English speech model
 
8
  print("Loading Pronunciation Engine...")
9
+ asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
10
 
11
  def assess_pronunciation(audio_filepath, target_text):
12
+ if not audio_filepath or not target_text:
13
+ return {"error": "Missing input"}
14
+
15
  try:
16
+ # 1. Transcribe the audio
17
+ result = asr_pipe(audio_filepath)
18
+ student_said = result["text"].lower().strip()
19
  target_clean = target_text.lower().strip()
20
 
21
+ # 2. Calculate Accuracy (String similarity)
22
+ # This acts as a 'Goodness of Pronunciation' proxy
 
 
23
  accuracy = SequenceMatcher(None, target_clean, student_said).ratio() * 100
24
 
25
+ # 3. Calculate Fluency (Words per second)
26
  audio, sr = librosa.load(audio_filepath)
27
  duration = librosa.get_duration(y=audio, sr=sr)
28
  words_count = len(student_said.split())
29
+ # Heuristic: 120 WPM is native fluency
30
+ fluency = min(100, (words_count / max(duration, 1)) * 40)
31
 
32
  return {
33
  "accuracy_score": round(accuracy),
34
  "fluency_score": round(fluency),
35
+ "completeness_score": 100 if accuracy > 75 else round(accuracy + 10),
36
+ "transcription": student_said
37
  }
38
  except Exception as e:
39
  return {"error": str(e)}
40
 
41
+ # Interface set to Gradio 3 standards for simple API calls
42
  interface = gr.Interface(
43
  fn=assess_pronunciation,
44
  inputs=[gr.Audio(source="upload", type="filepath"), gr.Textbox(label="Target Text")],