import numpy as np import librosa import scipy.signal as sps def compute_spectral_analysis(y, sr, n_fft=4096): """Comprehensive spectral analysis tuned for speech QC.""" hop_length = n_fft // 4 # ============================================================ # STFT → Magnitude + dB Conversion # ============================================================ S = np.abs(librosa.stft( y, n_fft=n_fft, hop_length=hop_length, window="hann" )) freqs = np.linspace(0, sr / 2, S.shape[0]) # Convert amplitude to dB scale S_db = librosa.amplitude_to_db(S, ref=np.max) # ============================================================ # 90th Percentile Energy Envelope # ============================================================ S_power = S ** 2 energy = np.percentile(S_power, 90, axis=1) + 1e-20 total_energy = float(np.sum(energy)) cum_energy = np.cumsum(energy) roll85_idx = np.searchsorted(cum_energy, 0.85 * total_energy) roll95_idx = np.searchsorted(cum_energy, 0.95 * total_energy) freq_at_85 = float(freqs[min(roll85_idx, len(freqs) - 1)]) freq_at_95 = float(freqs[min(roll95_idx, len(freqs) - 1)]) # ============================================================ # Updated HF Envelope: 90th percentile of dB # ============================================================ mean_db_per_bin = np.percentile(S_db, 90, axis=1) peak_db = float(np.max(S_db)) threshold_db = peak_db - 60 non_silent_bins = np.where(mean_db_per_bin > threshold_db)[0] highest_freq = float(freqs[non_silent_bins[-1]]) if non_silent_bins.size else 0.0 # ============================================================ # Speech-Centric Band Energy Distribution # ============================================================ def band_energy(low, high): i1 = np.searchsorted(freqs, low) i2 = np.searchsorted(freqs, high) return float(100 * np.sum(energy[i1:i2]) / total_energy) def band_energy_above(f): idx = np.searchsorted(freqs, f) return float(100 * np.sum(energy[idx:]) / total_energy) energy_stats = { "below_100hz": band_energy(0, 100), "100_500hz": band_energy(100, 500), "500_2khz": band_energy(500, 2000), "2k_8khz": band_energy(2000, 8000), "8k_12khz": band_energy(8000, 12000), "12k_16khz": band_energy(12000, 16000), "above_16khz": band_energy_above(16000) } # ============================================================ # Brick-wall Detection # ============================================================ diffs = np.diff(mean_db_per_bin) big_drop_idx = np.where(diffs < -20)[0] brick_wall = bool(big_drop_idx.size) brick_freq = float(freqs[big_drop_idx[0]]) if big_drop_idx.size else None # ============================================================ # Spectral Notch Detection (Median-filtering) # ============================================================ smooth = sps.medfilt(mean_db_per_bin, kernel_size=9) minima = sps.argrelextrema(smooth, np.less)[0] notches = [] for m in minima: left = smooth[max(0, m - 6):m] right = smooth[m + 1:min(len(smooth), m + 7)] neighbor_peak = max( left.max() if left.size else -999, right.max() if right.size else -999 ) depth = neighbor_peak - smooth[m] if depth >= 15 and freqs[m] > 100: notches.append({ "freq": float(freqs[m]), "depth_db": float(depth) }) # ============================================================ # Additional Spectral Descriptors # ============================================================ centroid = float(np.mean(librosa.feature.spectral_centroid(S=S, sr=sr))) bandwidth = float(np.mean(librosa.feature.spectral_bandwidth(S=S, sr=sr))) flatness = float(np.mean(librosa.feature.spectral_flatness(S=S))) rolloff = float(np.mean(librosa.feature.spectral_rolloff(S=S, sr=sr))) return { "S_db": S_db, "freqs": freqs, "hop_length": hop_length, "n_fft": n_fft, "rolloff_85pct": freq_at_85, "rolloff_95pct": freq_at_95, "highest_freq_minus60db": highest_freq, "energy_distribution": energy_stats, "brick_wall_detected": brick_wall, "brick_wall_freq": brick_freq, "spectral_notches": notches, "spectral_centroid": centroid, "spectral_bandwidth": bandwidth, "spectral_flatness": flatness, "spectral_rolloff": rolloff, "hf_env": mean_db_per_bin, "lf_env": mean_db_per_bin[:200] if len(mean_db_per_bin) > 200 else mean_db_per_bin }