Spaces:
Running
Running
| import numpy as np | |
| import librosa | |
| def advanced_preprocess(audio_data, sr): | |
| """ | |
| Applies the same preprocessing used in the advanced training pipeline: | |
| 1. Spectral Gating Noise Reduction | |
| 2. Pre-emphasis filter | |
| 3. Normalization | |
| """ | |
| # 1. Noise Reduction | |
| D = librosa.stft(audio_data) | |
| magnitude, phase = np.abs(D), np.angle(D) | |
| noise_floor = np.percentile(magnitude, 10, axis=1, keepdims=True) | |
| mask = np.maximum(magnitude - 1.5 * noise_floor, 0) / (magnitude + 1e-8) | |
| D_cleaned = magnitude * mask * np.exp(1j * phase) | |
| audio_clean = librosa.istft(D_cleaned, length=len(audio_data)) | |
| # 2. Pre-emphasis | |
| audio_pre = np.append(audio_clean[0], audio_clean[1:] - 0.97 * audio_clean[:-1]) | |
| # 3. Normalization | |
| max_val = np.max(np.abs(audio_pre)) | |
| if max_val > 0: | |
| return audio_pre / max_val | |
| return audio_pre | |