Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import librosa | |
| import soundfile as sf | |
| import os | |
| from scipy import signal | |
| class SignalProcessor: | |
| """ | |
| Class for specialized signal processing operations to enhance engine sound classification, | |
| with particular focus on improving valve lash detection. | |
| """ | |
| def __init__(self, sr=22050): | |
| """ | |
| Initialize the signal processor. | |
| Args: | |
| sr (int): Sample rate for processing | |
| """ | |
| self.sr = sr | |
| def bandpass_filter(self, audio, low_freq=500, high_freq=4000): | |
| """ | |
| Apply a bandpass filter to focus on frequencies typical for valve lash issues. | |
| Args: | |
| audio (np.array): Audio signal | |
| low_freq (int): Lower cutoff frequency in Hz | |
| high_freq (int): Upper cutoff frequency in Hz | |
| Returns: | |
| np.array: Filtered audio signal | |
| """ | |
| nyquist = 0.5 * self.sr | |
| low = low_freq / nyquist | |
| high = high_freq / nyquist | |
| # Create a bandpass filter | |
| b, a = signal.butter(4, [low, high], btype='band') | |
| # Apply the filter | |
| filtered_audio = signal.filtfilt(b, a, audio) | |
| return filtered_audio | |
| def enhance_transients(self, audio, threshold=0.1, boost_factor=2.0): | |
| """ | |
| Enhance transient sounds which are characteristic of valve lash issues. | |
| Args: | |
| audio (np.array): Audio signal | |
| threshold (float): Threshold for detecting transients | |
| boost_factor (float): Factor to boost transients | |
| Returns: | |
| np.array: Audio with enhanced transients | |
| """ | |
| # Compute the envelope | |
| envelope = np.abs(signal.hilbert(audio)) | |
| # Compute the derivative of the envelope to detect rapid changes | |
| envelope_diff = np.diff(envelope, prepend=0) | |
| # Create a mask for transients | |
| transient_mask = np.zeros_like(audio) | |
| transient_mask[envelope_diff > threshold] = 1 | |
| # Smooth the mask | |
| transient_mask = signal.convolve(transient_mask, | |
| signal.windows.hann(int(0.01 * self.sr)), | |
| mode='same') | |
| # Boost the transients | |
| enhanced_audio = audio.copy() | |
| enhanced_audio += audio * transient_mask * (boost_factor - 1) | |
| # Normalize | |
| if np.max(np.abs(enhanced_audio)) > 0: | |
| enhanced_audio = enhanced_audio / np.max(np.abs(enhanced_audio)) | |
| return enhanced_audio | |
| def harmonic_percussive_separation(self, audio, margin=3.0): | |
| """ | |
| Separate harmonic and percussive components, emphasizing the percussive | |
| elements that are often present in valve lash issues. | |
| Args: | |
| audio (np.array): Audio signal | |
| margin (float): Margin for separation | |
| Returns: | |
| np.array: Enhanced audio focusing on percussive elements | |
| """ | |
| # Separate harmonic and percussive components | |
| harmonic, percussive = librosa.effects.hpss(audio, margin=margin) | |
| # Emphasize percussive components for valve lash detection | |
| enhanced = harmonic * 0.3 + percussive * 1.7 | |
| # Normalize | |
| if np.max(np.abs(enhanced)) > 0: | |
| enhanced = enhanced / np.max(np.abs(enhanced)) | |
| return enhanced | |
| def spectral_contrast_enhancement(self, audio, n_bands=6, boost=2.0): | |
| """ | |
| Enhance spectral contrast to make valve lash sounds more distinguishable. | |
| Args: | |
| audio (np.array): Audio signal | |
| n_bands (int): Number of frequency bands | |
| boost (float): Boost factor for contrast | |
| Returns: | |
| np.array: Audio with enhanced spectral contrast | |
| """ | |
| # Compute spectrogram | |
| S = librosa.stft(audio) | |
| # Compute spectral contrast | |
| contrast = librosa.feature.spectral_contrast(S=np.abs(S), sr=self.sr, n_bands=n_bands) | |
| # Enhance contrast | |
| contrast_enhanced = contrast * boost | |
| # Reconstruct signal (simplified approach) | |
| # This is a basic approximation - in a real implementation, you would | |
| # modify the original spectrogram based on the enhanced contrast | |
| S_contrast = np.abs(S) * (1 + np.mean(contrast_enhanced, axis=0, keepdims=True)) | |
| y_enhanced = librosa.istft(S_contrast * np.exp(1j * np.angle(S))) | |
| # Normalize | |
| if np.max(np.abs(y_enhanced)) > 0: | |
| y_enhanced = y_enhanced / np.max(np.abs(y_enhanced)) | |
| return y_enhanced | |
| def process_valve_lash_audio(self, audio_path, output_path=None): | |
| """ | |
| Apply a combination of processing techniques specifically designed | |
| for valve lash issue detection. | |
| Args: | |
| audio_path (str): Path to the audio file | |
| output_path (str, optional): Path to save the processed audio | |
| Returns: | |
| np.array: Processed audio signal | |
| """ | |
| # Load audio | |
| audio, sr = librosa.load(audio_path, sr=self.sr) | |
| # Apply bandpass filter to focus on valve lash frequency range | |
| audio = self.bandpass_filter(audio, low_freq=800, high_freq=5000) | |
| # Enhance transients | |
| audio = self.enhance_transients(audio, threshold=0.05, boost_factor=2.5) | |
| # Apply harmonic-percussive separation | |
| audio = self.harmonic_percussive_separation(audio, margin=4.0) | |
| # Enhance spectral contrast | |
| audio = self.spectral_contrast_enhancement(audio, n_bands=6, boost=2.0) | |
| # Save processed audio if output path is provided | |
| if output_path: | |
| sf.write(output_path, audio, self.sr) | |
| return audio | |
| def process_valve_lash_dataset(self, input_dir, output_dir): | |
| """ | |
| Process all valve lash audio files in a directory. | |
| Args: | |
| input_dir (str): Directory containing valve lash audio files | |
| output_dir (str): Directory to save processed files | |
| """ | |
| # Create output directory if it doesn't exist | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Process each file | |
| for filename in os.listdir(input_dir): | |
| if filename.endswith('.wav'): | |
| input_path = os.path.join(input_dir, filename) | |
| output_path = os.path.join(output_dir, f"enhanced_{filename}") | |
| print(f"Processing {filename}...") | |
| self.process_valve_lash_audio(input_path, output_path) | |
| print(f"All files processed and saved to {output_dir}") | |
| def extract_valve_lash_features(self, audio): | |
| """ | |
| Extract features specifically designed to capture valve lash characteristics. | |
| Args: | |
| audio (np.array): Audio signal | |
| Returns: | |
| np.array: Extracted features | |
| """ | |
| # Standard features | |
| mfccs = librosa.feature.mfcc(y=audio, sr=self.sr, n_mfcc=13) | |
| spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sr) | |
| spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=self.sr) | |
| # Add more features for better classification (same as standard extraction) | |
| chroma = librosa.feature.chroma_stft(y=audio, sr=self.sr) | |
| zero_crossing_rate = librosa.feature.zero_crossing_rate(audio) | |
| # Valve lash specific features | |
| # 1. Spectral flatness - valve lash often has more noise-like components | |
| spectral_flatness = librosa.feature.spectral_flatness(y=audio) | |
| # 2. Onset strength - valve lash has distinctive onsets | |
| onset_env = librosa.onset.onset_strength(y=audio, sr=self.sr) | |
| onset_mean = float(np.mean(onset_env)) | |
| onset_std = float(np.std(onset_env)) | |
| onset_max = float(np.max(onset_env)) | |
| # 3. Rhythm features - valve lash has a specific rhythm | |
| # Handle empty beats array safely | |
| try: | |
| tempo, beats = librosa.beat.beat_track(y=audio, sr=self.sr) | |
| # Ensure tempo is a scalar float | |
| tempo = float(tempo) | |
| if len(beats) > 1: | |
| beat_intervals = np.diff(beats) | |
| mean_intervals = float(np.mean(beat_intervals)) | |
| std_intervals = float(np.std(beat_intervals)) | |
| else: | |
| mean_intervals = 0.0 | |
| std_intervals = 0.0 | |
| except Exception as e: | |
| # Fallback values if beat tracking fails | |
| tempo = 0.0 | |
| mean_intervals = 0.0 | |
| std_intervals = 0.0 | |
| # Combine all features to match the standard feature extraction length (41 features) | |
| # Standard features: 13 (mfcc mean) + 13 (mfcc std) + 1 (centroid) + 1 (rolloff) + 12 (chroma) + 1 (zcr) = 41 | |
| features = np.concatenate([ | |
| mfccs.mean(axis=1), # 13 features | |
| mfccs.std(axis=1), # 13 features | |
| spectral_centroid.mean(axis=1), # 1 feature | |
| spectral_rolloff.mean(axis=1), # 1 feature | |
| chroma.mean(axis=1), # 12 features | |
| zero_crossing_rate.mean(axis=1).reshape(-1) # 1 feature | |
| ]) | |
| # Ensure we have exactly 41 features to match the standard extraction | |
| assert len(features) == 41, f"Feature length mismatch: {len(features)} != 41" | |
| return features | |
| def enhance_valve_lash_dataset(input_dir, output_dir, sr=22050): | |
| """ | |
| Convenience function to enhance valve lash audio files. | |
| Args: | |
| input_dir (str): Directory containing valve lash audio files | |
| output_dir (str): Directory to save processed files | |
| sr (int): Sample rate | |
| """ | |
| processor = SignalProcessor(sr=sr) | |
| processor.process_valve_lash_dataset(input_dir, output_dir) | |