Spaces:

danarcat
/

PronunciationChecker

Sleeping

karlhajal commited on Jan 22, 2025

Commit

6ea1d37

verified ·

1 Parent(s): dfe6023

Update red vs green logic in quality score computation

Files changed (1) hide show

src/audio_preprocessing.py CHANGED Viewed

@@ -102,22 +102,21 @@ def process_wav(wav_path, target_sr, do_trim_silences=True):
     return audio
-import numpy as np
 def assess_pronunciation_quality(dist_matrix, path):
     # Extract distances along the alignment path
     path_distances = [dist_matrix[i, j] for i, j in zip(*path)]
-    # Calculate global min and max for normalization
-    global_min = dist_matrix.min()
-    global_max = dist_matrix.max()
-    # Normalize distances
-    normalized_distances = [(d - global_min) / (global_max - global_min) for d in path_distances]
     # Analyze normalized distances
-    num_red_segments = sum(1 for d in normalized_distances if d >= 0.5)
-    total_segments = len(normalized_distances)
     red_percentage = num_red_segments / total_segments if total_segments > 0 else 0.0
     # Calculate quality score and repetition need

     return audio
 def assess_pronunciation_quality(dist_matrix, path):
     # Extract distances along the alignment path
     path_distances = [dist_matrix[i, j] for i, j in zip(*path)]
+    num_wav_frames = len(dist_matrix) # For the reference wav
+    wav_distances = [0] * num_wav_frames
+    for (i, j) in zip(*path):
+        wav_distances[i] = dist_matrix[i, j] # For the reference wav
+    threshold = 0.3
     # Analyze normalized distances
+    num_red_segments = sum(1 for d in wav_distances if d >= threshold)
+    total_segments = len(wav_distances)
     red_percentage = num_red_segments / total_segments if total_segments > 0 else 0.0
     # Calculate quality score and repetition need