KasaHealth / utils /audio_preprocessor.py
78anand's picture
Upload folder using huggingface_hub
221d807 verified
raw
history blame contribute delete
908 Bytes
import numpy as np
import librosa
def advanced_preprocess(audio_data, sr):
"""
Applies the same preprocessing used in the advanced training pipeline:
1. Spectral Gating Noise Reduction
2. Pre-emphasis filter
3. Normalization
"""
# 1. Noise Reduction
D = librosa.stft(audio_data)
magnitude, phase = np.abs(D), np.angle(D)
noise_floor = np.percentile(magnitude, 10, axis=1, keepdims=True)
mask = np.maximum(magnitude - 1.5 * noise_floor, 0) / (magnitude + 1e-8)
D_cleaned = magnitude * mask * np.exp(1j * phase)
audio_clean = librosa.istft(D_cleaned, length=len(audio_data))
# 2. Pre-emphasis
audio_pre = np.append(audio_clean[0], audio_clean[1:] - 0.97 * audio_clean[:-1])
# 3. Normalization
max_val = np.max(np.abs(audio_pre))
if max_val > 0:
return audio_pre / max_val
return audio_pre