Mr7Explorer commited on
Commit
b72f14c
·
verified ·
1 Parent(s): 391be2d

Create spectral.py

Browse files
Files changed (1) hide show
  1. spectral.py +150 -0
spectral.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # spectral.py
2
+ # ============================================================
3
+ # Spectral Analysis Module for Audio Forensic Analyzer
4
+ # Logic preserved exactly from original app.py (cleaned + modular)
5
+ # ============================================================
6
+
7
+ import numpy as np
8
+ import librosa
9
+ import scipy.signal as sps
10
+
11
+
12
+ def compute_spectral_analysis(y, sr, n_fft=4096):
13
+ """Comprehensive spectral analysis tuned for speech QC."""
14
+
15
+ hop_length = n_fft // 4
16
+
17
+ # ============================================================
18
+ # STFT → Magnitude + dB Conversion
19
+ # ============================================================
20
+ S = np.abs(librosa.stft(
21
+ y,
22
+ n_fft=n_fft,
23
+ hop_length=hop_length,
24
+ window="hann"
25
+ ))
26
+
27
+ freqs = np.linspace(0, sr / 2, S.shape[0])
28
+
29
+ # Convert amplitude to dB scale
30
+ S_db = librosa.amplitude_to_db(S, ref=np.max)
31
+
32
+ # ============================================================
33
+ # 90th Percentile Energy Envelope (Major Improvement)
34
+ # ============================================================
35
+ S_power = S ** 2
36
+ energy = np.percentile(S_power, 90, axis=1) + 1e-20
37
+ total_energy = float(np.sum(energy))
38
+
39
+ cum_energy = np.cumsum(energy)
40
+
41
+ roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy)
42
+ roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy)
43
+
44
+ freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)])
45
+ freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)])
46
+
47
+ # ============================================================
48
+ # Updated HF Envelope: 90th percentile of dB
49
+ # ============================================================
50
+ mean_db_per_bin = np.percentile(S_db, 90, axis=1)
51
+
52
+ peak_db = float(np.max(S_db))
53
+ threshold_db = peak_db - 60
54
+
55
+ non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0]
56
+ highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0
57
+
58
+ # ============================================================
59
+ # Speech-Centric Band Energy Distribution
60
+ # ============================================================
61
+ def band_energy(low, high):
62
+ i1 = np.searchsorted(freqs, low)
63
+ i2 = np.searchsorted(freqs, high)
64
+ return float(100 * np.sum(energy[i1:i2]) / total_energy)
65
+
66
+ def band_energy_above(f):
67
+ idx = np.searchsorted(freqs, f)
68
+ return float(100 * np.sum(energy[idx:]) / total_energy)
69
+
70
+ energy_stats = {
71
+ "below_100hz": band_energy(0, 100),
72
+ "100_500hz": band_energy(100, 500),
73
+ "500_2khz": band_energy(500, 2000),
74
+ "2k_8khz": band_energy(2000, 8000),
75
+ "8k_12khz": band_energy(8000, 12000),
76
+ "12k_16khz": band_energy(12000, 16000),
77
+ "above_16khz": band_energy_above(16000)
78
+ }
79
+
80
+ # ============================================================
81
+ # Brick-wall Detection
82
+ # ============================================================
83
+ diffs = np.diff(mean_db_per_bin)
84
+ big_drop_idx = np.where(diffs < -20)[0]
85
+
86
+ brick_wall = bool(big_drop_idx.size)
87
+ brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None
88
+
89
+ # ============================================================
90
+ # Spectral Notch Detection (Median-filtering)
91
+ # ============================================================
92
+ smooth = sps.medfilt(mean_db_per_bin, kernel_size=9)
93
+ minima = sps.argrelextrema(smooth, np.less)[0]
94
+ notches = []
95
+
96
+ for m in minima:
97
+ left = smooth[max(0, m - 6):m]
98
+ right = smooth[m + 1:min(len(smooth), m + 7)]
99
+
100
+ neighbor_peak = max(
101
+ left.max() if left.size else -999,
102
+ right.max() if right.size else -999
103
+ )
104
+
105
+ depth = neighbor_peak - smooth[m]
106
+
107
+ if depth >= 15 and freqs[m] > 100:
108
+ notches.append({
109
+ "freq": float(freqs[m]),
110
+ "depth_db": float(depth)
111
+ })
112
+
113
+ # ============================================================
114
+ # Additional Spectral Descriptors
115
+ # ============================================================
116
+ centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr)))
117
+ bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr)))
118
+ flatness = float(np.mean(librosa.feature.spectral_flatness(S=S)))
119
+ rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr)))
120
+
121
+ # ============================================================
122
+ # FINAL RETURN STRUCTURE
123
+ # (Matches original format exactly)
124
+ # ============================================================
125
+ return {
126
+ "S_db": S_db,
127
+ "freqs": freqs,
128
+ "hop_length": hop_length,
129
+ "n_fft": n_fft,
130
+
131
+ "rolloff_85pct": freq_at_85,
132
+ "rolloff_95pct": freq_at_95,
133
+ "highest_freq_minus60db": highest_freq,
134
+
135
+ "energy_distribution": energy_stats,
136
+
137
+ "brick_wall_detected": brick_wall,
138
+ "brick_wall_freq": brick_freq,
139
+
140
+ "spectral_notches": notches,
141
+
142
+ "spectral_centroid": centroid,
143
+ "spectral_bandwidth": bandwidth,
144
+ "spectral_flatness": flatness,
145
+ "spectral_rolloff": rolloff,
146
+
147
+ # Added envelopes for downstream detectors (unchanged logic)
148
+ "hf_env": mean_db_per_bin,
149
+ "lf_env": mean_db_per_bin[:200] if len(mean_db_per_bin) > 200 else mean_db_per_bin
150
+ }