sample-noise / signal_processing.py
Kumar Shubham
Initial push
b7becdf
import numpy as np
import librosa
import soundfile as sf
import os
from scipy import signal
class SignalProcessor:
"""
Class for specialized signal processing operations to enhance engine sound classification,
with particular focus on improving valve lash detection.
"""
def __init__(self, sr=22050):
"""
Initialize the signal processor.
Args:
sr (int): Sample rate for processing
"""
self.sr = sr
def bandpass_filter(self, audio, low_freq=500, high_freq=4000):
"""
Apply a bandpass filter to focus on frequencies typical for valve lash issues.
Args:
audio (np.array): Audio signal
low_freq (int): Lower cutoff frequency in Hz
high_freq (int): Upper cutoff frequency in Hz
Returns:
np.array: Filtered audio signal
"""
nyquist = 0.5 * self.sr
low = low_freq / nyquist
high = high_freq / nyquist
# Create a bandpass filter
b, a = signal.butter(4, [low, high], btype='band')
# Apply the filter
filtered_audio = signal.filtfilt(b, a, audio)
return filtered_audio
def enhance_transients(self, audio, threshold=0.1, boost_factor=2.0):
"""
Enhance transient sounds which are characteristic of valve lash issues.
Args:
audio (np.array): Audio signal
threshold (float): Threshold for detecting transients
boost_factor (float): Factor to boost transients
Returns:
np.array: Audio with enhanced transients
"""
# Compute the envelope
envelope = np.abs(signal.hilbert(audio))
# Compute the derivative of the envelope to detect rapid changes
envelope_diff = np.diff(envelope, prepend=0)
# Create a mask for transients
transient_mask = np.zeros_like(audio)
transient_mask[envelope_diff > threshold] = 1
# Smooth the mask
transient_mask = signal.convolve(transient_mask,
signal.windows.hann(int(0.01 * self.sr)),
mode='same')
# Boost the transients
enhanced_audio = audio.copy()
enhanced_audio += audio * transient_mask * (boost_factor - 1)
# Normalize
if np.max(np.abs(enhanced_audio)) > 0:
enhanced_audio = enhanced_audio / np.max(np.abs(enhanced_audio))
return enhanced_audio
def harmonic_percussive_separation(self, audio, margin=3.0):
"""
Separate harmonic and percussive components, emphasizing the percussive
elements that are often present in valve lash issues.
Args:
audio (np.array): Audio signal
margin (float): Margin for separation
Returns:
np.array: Enhanced audio focusing on percussive elements
"""
# Separate harmonic and percussive components
harmonic, percussive = librosa.effects.hpss(audio, margin=margin)
# Emphasize percussive components for valve lash detection
enhanced = harmonic * 0.3 + percussive * 1.7
# Normalize
if np.max(np.abs(enhanced)) > 0:
enhanced = enhanced / np.max(np.abs(enhanced))
return enhanced
def spectral_contrast_enhancement(self, audio, n_bands=6, boost=2.0):
"""
Enhance spectral contrast to make valve lash sounds more distinguishable.
Args:
audio (np.array): Audio signal
n_bands (int): Number of frequency bands
boost (float): Boost factor for contrast
Returns:
np.array: Audio with enhanced spectral contrast
"""
# Compute spectrogram
S = librosa.stft(audio)
# Compute spectral contrast
contrast = librosa.feature.spectral_contrast(S=np.abs(S), sr=self.sr, n_bands=n_bands)
# Enhance contrast
contrast_enhanced = contrast * boost
# Reconstruct signal (simplified approach)
# This is a basic approximation - in a real implementation, you would
# modify the original spectrogram based on the enhanced contrast
S_contrast = np.abs(S) * (1 + np.mean(contrast_enhanced, axis=0, keepdims=True))
y_enhanced = librosa.istft(S_contrast * np.exp(1j * np.angle(S)))
# Normalize
if np.max(np.abs(y_enhanced)) > 0:
y_enhanced = y_enhanced / np.max(np.abs(y_enhanced))
return y_enhanced
def process_valve_lash_audio(self, audio_path, output_path=None):
"""
Apply a combination of processing techniques specifically designed
for valve lash issue detection.
Args:
audio_path (str): Path to the audio file
output_path (str, optional): Path to save the processed audio
Returns:
np.array: Processed audio signal
"""
# Load audio
audio, sr = librosa.load(audio_path, sr=self.sr)
# Apply bandpass filter to focus on valve lash frequency range
audio = self.bandpass_filter(audio, low_freq=800, high_freq=5000)
# Enhance transients
audio = self.enhance_transients(audio, threshold=0.05, boost_factor=2.5)
# Apply harmonic-percussive separation
audio = self.harmonic_percussive_separation(audio, margin=4.0)
# Enhance spectral contrast
audio = self.spectral_contrast_enhancement(audio, n_bands=6, boost=2.0)
# Save processed audio if output path is provided
if output_path:
sf.write(output_path, audio, self.sr)
return audio
def process_valve_lash_dataset(self, input_dir, output_dir):
"""
Process all valve lash audio files in a directory.
Args:
input_dir (str): Directory containing valve lash audio files
output_dir (str): Directory to save processed files
"""
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Process each file
for filename in os.listdir(input_dir):
if filename.endswith('.wav'):
input_path = os.path.join(input_dir, filename)
output_path = os.path.join(output_dir, f"enhanced_{filename}")
print(f"Processing {filename}...")
self.process_valve_lash_audio(input_path, output_path)
print(f"All files processed and saved to {output_dir}")
def extract_valve_lash_features(self, audio):
"""
Extract features specifically designed to capture valve lash characteristics.
Args:
audio (np.array): Audio signal
Returns:
np.array: Extracted features
"""
# Standard features
mfccs = librosa.feature.mfcc(y=audio, sr=self.sr, n_mfcc=13)
spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sr)
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=self.sr)
# Add more features for better classification (same as standard extraction)
chroma = librosa.feature.chroma_stft(y=audio, sr=self.sr)
zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)
# Valve lash specific features
# 1. Spectral flatness - valve lash often has more noise-like components
spectral_flatness = librosa.feature.spectral_flatness(y=audio)
# 2. Onset strength - valve lash has distinctive onsets
onset_env = librosa.onset.onset_strength(y=audio, sr=self.sr)
onset_mean = float(np.mean(onset_env))
onset_std = float(np.std(onset_env))
onset_max = float(np.max(onset_env))
# 3. Rhythm features - valve lash has a specific rhythm
# Handle empty beats array safely
try:
tempo, beats = librosa.beat.beat_track(y=audio, sr=self.sr)
# Ensure tempo is a scalar float
tempo = float(tempo)
if len(beats) > 1:
beat_intervals = np.diff(beats)
mean_intervals = float(np.mean(beat_intervals))
std_intervals = float(np.std(beat_intervals))
else:
mean_intervals = 0.0
std_intervals = 0.0
except Exception as e:
# Fallback values if beat tracking fails
tempo = 0.0
mean_intervals = 0.0
std_intervals = 0.0
# Combine all features to match the standard feature extraction length (41 features)
# Standard features: 13 (mfcc mean) + 13 (mfcc std) + 1 (centroid) + 1 (rolloff) + 12 (chroma) + 1 (zcr) = 41
features = np.concatenate([
mfccs.mean(axis=1), # 13 features
mfccs.std(axis=1), # 13 features
spectral_centroid.mean(axis=1), # 1 feature
spectral_rolloff.mean(axis=1), # 1 feature
chroma.mean(axis=1), # 12 features
zero_crossing_rate.mean(axis=1).reshape(-1) # 1 feature
])
# Ensure we have exactly 41 features to match the standard extraction
assert len(features) == 41, f"Feature length mismatch: {len(features)} != 41"
return features
def enhance_valve_lash_dataset(input_dir, output_dir, sr=22050):
"""
Convenience function to enhance valve lash audio files.
Args:
input_dir (str): Directory containing valve lash audio files
output_dir (str): Directory to save processed files
sr (int): Sample rate
"""
processor = SignalProcessor(sr=sr)
processor.process_valve_lash_dataset(input_dir, output_dir)