karlhajal commited on
Commit
6ea1d37
·
verified ·
1 Parent(s): dfe6023

Update red vs green logic in quality score computation

Browse files
Files changed (1) hide show
  1. src/audio_preprocessing.py +8 -9
src/audio_preprocessing.py CHANGED
@@ -102,22 +102,21 @@ def process_wav(wav_path, target_sr, do_trim_silences=True):
102
 
103
  return audio
104
 
105
- import numpy as np
106
 
107
  def assess_pronunciation_quality(dist_matrix, path):
108
  # Extract distances along the alignment path
109
  path_distances = [dist_matrix[i, j] for i, j in zip(*path)]
110
 
111
- # Calculate global min and max for normalization
112
- global_min = dist_matrix.min()
113
- global_max = dist_matrix.max()
114
-
115
- # Normalize distances
116
- normalized_distances = [(d - global_min) / (global_max - global_min) for d in path_distances]
117
 
118
  # Analyze normalized distances
119
- num_red_segments = sum(1 for d in normalized_distances if d >= 0.5)
120
- total_segments = len(normalized_distances)
121
  red_percentage = num_red_segments / total_segments if total_segments > 0 else 0.0
122
 
123
  # Calculate quality score and repetition need
 
102
 
103
  return audio
104
 
 
105
 
106
  def assess_pronunciation_quality(dist_matrix, path):
107
  # Extract distances along the alignment path
108
  path_distances = [dist_matrix[i, j] for i, j in zip(*path)]
109
 
110
+ num_wav_frames = len(dist_matrix) # For the reference wav
111
+ wav_distances = [0] * num_wav_frames
112
+ for (i, j) in zip(*path):
113
+ wav_distances[i] = dist_matrix[i, j] # For the reference wav
114
+
115
+ threshold = 0.3
116
 
117
  # Analyze normalized distances
118
+ num_red_segments = sum(1 for d in wav_distances if d >= threshold)
119
+ total_segments = len(wav_distances)
120
  red_percentage = num_red_segments / total_segments if total_segments > 0 else 0.0
121
 
122
  # Calculate quality score and repetition need