File size: 908 Bytes
f317798
 
 
 
 
221d807
 
 
 
f317798
 
221d807
f317798
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import numpy as np
import librosa

def advanced_preprocess(audio_data, sr):
    """

    Applies the same preprocessing used in the advanced training pipeline:

    1. Spectral Gating Noise Reduction

    2. Pre-emphasis filter

    3. Normalization

    """
    # 1. Noise Reduction
    D = librosa.stft(audio_data)
    magnitude, phase = np.abs(D), np.angle(D)
    noise_floor = np.percentile(magnitude, 10, axis=1, keepdims=True)
    mask = np.maximum(magnitude - 1.5 * noise_floor, 0) / (magnitude + 1e-8)
    D_cleaned = magnitude * mask * np.exp(1j * phase)
    audio_clean = librosa.istft(D_cleaned, length=len(audio_data))
    
    # 2. Pre-emphasis
    audio_pre = np.append(audio_clean[0], audio_clean[1:] - 0.97 * audio_clean[:-1])
    
    # 3. Normalization
    max_val = np.max(np.abs(audio_pre))
    if max_val > 0:
        return audio_pre / max_val
    return audio_pre