AshishNoel14 commited on
Commit
41d7bd9
·
verified ·
1 Parent(s): eb02f99

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. metrics/samplerate_metric.py +24 -15
metrics/samplerate_metric.py CHANGED
@@ -25,26 +25,35 @@ def get_mic_sr(audio_path):
25
  # Compute magnitude spectrum
26
  S_full, phase = librosa.magphase(librosa.stft(y))
27
 
28
- # Calculate power spectrum
29
- power = S_full**2
 
 
30
 
31
- # Average power across time
32
- avg_power = np.mean(power, axis=1)
33
-
34
- # Cumulative distribution of power
35
- cumulative_power = np.cumsum(avg_power)
36
- total_power = cumulative_power[-1]
37
-
38
- if total_power == 0:
39
  return 0
40
 
41
- # Find frequency where 99% of power is contained
42
- threshold = 0.99 * total_power
43
- idx = np.searchsorted(cumulative_power, threshold)
 
 
 
 
44
 
45
- # Convert bin index to frequency
46
  fft_freqs = librosa.fft_frequencies(sr=sr)
47
- cutoff_freq = fft_freqs[idx]
 
 
 
 
 
 
 
 
 
48
 
49
  # Effective SR is 2 * Cutoff
50
  effective_sr = int(cutoff_freq * 2)
 
25
  # Compute magnitude spectrum
26
  S_full, phase = librosa.magphase(librosa.stft(y))
27
 
28
+ # Max Hold Spectrum (Peak detection across time)
29
+ # Instead of average, we take the MAX magnitude at each frequency bin across all time frames.
30
+ # This aligns with "Max peaks on the spectrogram".
31
+ S_max = np.max(S_full, axis=1)
32
 
33
+ # Normalize to Max Peak (0 Reference)
34
+ S_ref = np.max(S_max)
35
+ if S_ref == 0:
 
 
 
 
 
36
  return 0
37
 
38
+ # Convert to dB relative to peak
39
+ S_db = librosa.amplitude_to_db(S_max, ref=S_ref)
40
+
41
+ # Threshold: Find the highest frequency that is within X dB of the peak.
42
+ # Typical noise floor might be -80dB or -90dB.
43
+ # We look for the "last" bin that is ABOVE the threshold.
44
+ threshold_db = -80.0
45
 
 
46
  fft_freqs = librosa.fft_frequencies(sr=sr)
47
+
48
+ # Find indices where signal > threshold
49
+ valid_indices = np.where(S_db > threshold_db)[0]
50
+
51
+ if len(valid_indices) == 0:
52
+ return 0
53
+
54
+ # The highest index with significant energy
55
+ last_idx = valid_indices[-1]
56
+ cutoff_freq = fft_freqs[last_idx]
57
 
58
  # Effective SR is 2 * Cutoff
59
  effective_sr = int(cutoff_freq * 2)