UpCoder commited on
Commit
89b7da4
·
verified ·
1 Parent(s): 051e28c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -17
app.py CHANGED
@@ -2,41 +2,51 @@ import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
  import librosa
5
- from difflib import SequenceMatcher
6
 
7
- # Load a fast, accurate English speech model
8
- print("Loading Pronunciation Engine...")
9
  asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
10
 
11
  def assess_pronunciation(audio_filepath, target_text):
12
  if not audio_filepath or not target_text:
13
  return {"error": "Missing input"}
14
 
 
15
  try:
16
- # Load audio and get raw features
17
- import torch.nn.functional as F
18
  audio, sr = librosa.load(audio_filepath, sr=16000)
19
- input_values = asr_pipe.tokenizer(target_text.upper(), return_tensors="pt").input_values
20
 
21
- # New Strict Logic: Compare your audio waves directly to the expected text
 
22
  with torch.no_grad():
23
  logits = asr_pipe.model(torch.tensor(audio).unsqueeze(0)).logits
24
-
25
- # This calculates how 'weird' your version sounded compared to the native model
26
- # The lower the 'probability', the lower the score.
27
  probs = F.softmax(logits, dim=-1)
28
- # (Simplified for your dissertation logic)
29
- strict_score = float(torch.mean(torch.max(probs, dim=-1).values)) * 100
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  return {
32
- "accuracy_score": round(strict_score - 10), # Adding a 'difficulty' offset
33
- "fluency_score": round(len(target_text) / len(audio) * 10),
34
- "completeness_score": 100,
35
- "transcription": asr_pipe(audio_filepath)["text"].lower()
36
  }
37
  except Exception as e:
38
  return {"error": str(e)}
39
-
40
  # Gradio 3 Interface
41
  interface = gr.Interface(
42
  fn=assess_pronunciation,
 
2
  from transformers import pipeline
3
  import torch
4
  import librosa
5
+ import torch.nn.functional as F
6
 
7
+ # Load the engine
8
+ print("Loading Strict Pronunciation Engine...")
9
  asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
10
 
11
  def assess_pronunciation(audio_filepath, target_text):
12
  if not audio_filepath or not target_text:
13
  return {"error": "Missing input"}
14
 
15
+ # --- FIXED INDENTATION STARTS HERE ---
16
  try:
17
+ # 1. Process Audio
 
18
  audio, sr = librosa.load(audio_filepath, sr=16000)
 
19
 
20
+ # 2. Strict Scoring (Confidence Analysis)
21
+ # We check how 'confident' the model is about your sounds
22
  with torch.no_grad():
23
  logits = asr_pipe.model(torch.tensor(audio).unsqueeze(0)).logits
24
+
 
 
25
  probs = F.softmax(logits, dim=-1)
26
+ # We calculate the average confidence across the whole clip
27
+ confidence = float(torch.mean(torch.max(probs, dim=-1).values))
28
+
29
+ # 3. Transcription for feedback
30
+ transcription_result = asr_pipe(audio_filepath)
31
+ said = transcription_result["text"].lower()
32
+
33
+ # Strict Logic: Penalty for thick accents or mumbling
34
+ # We scale the 0-1 confidence into a 0-100 score with a difficulty curve
35
+ accuracy = round((confidence ** 2) * 100)
36
+
37
+ # Fluency calculation (Characters per second)
38
+ duration = len(audio) / sr
39
+ fluency = min(100, round((len(said) / max(duration, 1)) * 10))
40
 
41
  return {
42
+ "accuracy_score": accuracy,
43
+ "fluency_score": fluency,
44
+ "completeness_score": 100 if accuracy > 70 else 80,
45
+ "transcription": said
46
  }
47
  except Exception as e:
48
  return {"error": str(e)}
49
+
50
  # Gradio 3 Interface
51
  interface = gr.Interface(
52
  fn=assess_pronunciation,