Speech_recognition / audio_preprocessing.py
Pratik333's picture
Upload 9 files
4935b2c verified
"""
Lightweight audio preprocessing for speaker recognition.
MINIMAL processing to preserve voice characteristics.
"""
import numpy as np
import librosa
from scipy import signal
import warnings
warnings.filterwarnings('ignore')
try:
import noisereduce as nr
HAS_NOISEREDUCE = True
except ImportError:
HAS_NOISEREDUCE = False
print("⚠️ Install noisereduce for better results: pip install noisereduce")
class AudioPreprocessor:
"""Lightweight preprocessing - preserves voice characteristics."""
def __init__(self, sample_rate=16000):
self.sample_rate = sample_rate
def process(self, audio, sr=None, mode='light'):
"""
Minimal preprocessing pipeline.
Args:
audio: numpy array (float32)
sr: sample rate
mode: 'light' for enrollment, 'standard' for identification
Returns:
preprocessed audio (numpy array)
"""
if sr is None:
sr = self.sample_rate
# Step 1: Resample if needed
if sr != self.sample_rate:
audio = librosa.resample(audio, orig_sr=sr, target_sr=self.sample_rate)
sr = self.sample_rate
# Step 2: Convert to mono if stereo
if len(audio.shape) > 1:
audio = audio.mean(axis=1)
# Step 3: Remove DC offset
audio = audio - np.mean(audio)
# Step 4: Normalize amplitude
audio = self._normalize(audio)
# Step 5: Light noise reduction ONLY if mode is standard
if mode == 'standard' and HAS_NOISEREDUCE and len(audio) > sr * 0.5:
audio = self._reduce_noise_light(audio, sr)
# Step 6: Final normalization
audio = self._normalize(audio)
return audio
def _normalize(self, audio):
"""Normalize audio to [-1, 1] range."""
max_val = np.abs(audio).max()
if max_val > 0:
audio = audio / max_val
return audio
def _reduce_noise_light(self, audio, sr):
"""LIGHT noise reduction - preserves voice characteristics."""
try:
reduced = nr.reduce_noise(
y=audio,
sr=sr,
stationary=True,
prop_decrease=0.5, # Only 50% reduction (was 1.0 = 100%)
freq_mask_smooth_hz=1000,
time_mask_smooth_ms=100
)
return reduced
except Exception as e:
print(f"⚠️ Noise reduction skipped: {e}")
return audio
# Global preprocessor instance
_preprocessor = None
def get_preprocessor():
"""Get or create global preprocessor instance."""
global _preprocessor
if _preprocessor is None:
_preprocessor = AudioPreprocessor()
return _preprocessor
def preprocess_audio(audio, sr=16000, for_enrollment=False):
"""
Convenience function for preprocessing audio.
Args:
audio: numpy array
sr: sample rate
for_enrollment: if True, use lighter processing (preserves voice)
Returns:
preprocessed audio
"""
preprocessor = get_preprocessor()
if for_enrollment:
# LIGHT processing for enrollment - preserve voice characteristics
return preprocessor.process(audio, sr, mode='light')
else:
# STANDARD processing for identification - light noise reduction
return preprocessor.process(audio, sr, mode='standard')