import numpy as np import librosa def advanced_preprocess(audio_data, sr): """ Applies the same preprocessing used in the advanced training pipeline: 1. Spectral Gating Noise Reduction 2. Pre-emphasis filter 3. Normalization """ # 1. Noise Reduction D = librosa.stft(audio_data) magnitude, phase = np.abs(D), np.angle(D) noise_floor = np.percentile(magnitude, 10, axis=1, keepdims=True) mask = np.maximum(magnitude - 1.5 * noise_floor, 0) / (magnitude + 1e-8) D_cleaned = magnitude * mask * np.exp(1j * phase) audio_clean = librosa.istft(D_cleaned, length=len(audio_data)) # 2. Pre-emphasis audio_pre = np.append(audio_clean[0], audio_clean[1:] - 0.97 * audio_clean[:-1]) # 3. Normalization max_val = np.max(np.abs(audio_pre)) if max_val > 0: return audio_pre / max_val return audio_pre