Spaces:
Sleeping
Sleeping
| # Standard library imports | |
| import os | |
| from typing import Annotated | |
| # Related third-party imports | |
| import librosa | |
| import soundfile as sf | |
| from librosa.feature import rms | |
| from omegaconf import OmegaConf | |
| from noisereduce import reduce_noise | |
| from MPSENet import MPSENet | |
| # Local imports | |
| from src.utils.utils import Logger | |
| class Denoiser: | |
| """ | |
| A class to handle audio denoising using librosa and noisereduce. | |
| This class provides methods to load noisy audio, apply denoising, and | |
| save the cleaned output to disk. | |
| Parameters | |
| ---------- | |
| config_path : str | |
| Path to the configuration file that specifies runtime settings. | |
| output_dir : str, optional | |
| Directory to save cleaned audio files. Defaults to ".temp". | |
| Attributes | |
| ---------- | |
| config : omegaconf.DictConfig | |
| Loaded configuration data. | |
| output_dir : str | |
| Directory to save cleaned audio files. | |
| logger : Logger | |
| Logger instance for recording messages. | |
| """ | |
| def __init__(self, config_path: Annotated[str, "Path to the config file"], | |
| output_dir: Annotated[str, "Default directory to save cleaned audio files"] = ".temp") -> None: | |
| """ | |
| Initialize the Denoiser class. | |
| Parameters | |
| ---------- | |
| config_path : str | |
| Path to the configuration file that specifies runtime settings. | |
| output_dir : str, optional | |
| Default directory to save cleaned audio files. Defaults to ".temp". | |
| """ | |
| self.config = OmegaConf.load(config_path) | |
| self.output_dir = output_dir | |
| os.makedirs(self.output_dir, exist_ok=True) | |
| self.logger = Logger(name="DenoiserLogger") | |
| def denoise_audio( | |
| self, | |
| input_path: Annotated[str, "Path to the noisy audio file"], | |
| output_dir: Annotated[str, "Directory to save the cleaned audio file"], | |
| noise_threshold: Annotated[float, "Noise threshold value to decide if denoising is needed"], | |
| print_output: Annotated[bool, "Whether to log the process to console"] = False, | |
| ) -> str: | |
| """ | |
| Denoise an audio file using noisereduce and librosa. | |
| Parameters | |
| ---------- | |
| input_path : str | |
| Path to the noisy input audio file. | |
| output_dir : str | |
| Directory to save the cleaned audio file. | |
| noise_threshold : float | |
| Noise threshold value to decide if denoising is needed. | |
| print_output : bool, optional | |
| Whether to log the process to the console. Defaults to False. | |
| Returns | |
| ------- | |
| str | |
| Path to the saved audio file if denoising is performed, otherwise the original audio file path. | |
| Examples | |
| -------- | |
| >>> denoise = Denoiser("config.yaml") | |
| >>> input_file = "noisy_audio.wav" | |
| >>> output_directory = "cleaned_audio" | |
| >>> noise_thresh = 0.02 | |
| >>> result = denoiser.denoise_audio(input_file, output_directory, noise_thresh) | |
| >>> print(result) | |
| cleaned_audio/denoised.wav | |
| """ | |
| self.logger.log(f"Loading: {input_path}", print_output=print_output) | |
| noisy_waveform, sr = librosa.load(input_path, sr=None) | |
| noise_level = rms(y=noisy_waveform).mean() | |
| self.logger.log(f"Calculated noise level: {noise_level}", print_output=print_output) | |
| if noise_level < noise_threshold: | |
| self.logger.log("Noise level is below the threshold. Skipping denoising.", print_output=print_output) | |
| return input_path | |
| self.logger.log("Denoising process started...", print_output=print_output) | |
| cleaned_waveform = reduce_noise(y=noisy_waveform, sr=sr) | |
| output_path = os.path.join(output_dir, "denoised.wav") | |
| os.makedirs(output_dir, exist_ok=True) | |
| sf.write(output_path, cleaned_waveform, sr) | |
| self.logger.log(f"Denoising completed! Cleaned file: {output_path}", print_output=print_output) | |
| return output_path | |
| class SpeechEnhancement: | |
| """ | |
| A class for speech enhancement using the MPSENet model. | |
| This class provides methods to load audio, apply enhancement using a | |
| pre-trained MPSENet model, and save the enhanced output. | |
| Parameters | |
| ---------- | |
| config_path : str | |
| Path to the configuration file specifying runtime settings. | |
| output_dir : str, optional | |
| Directory to save enhanced audio files. Defaults to ".temp". | |
| Attributes | |
| ---------- | |
| config : omegaconf.DictConfig | |
| Loaded configuration data. | |
| output_dir : str | |
| Directory to save enhanced audio files. | |
| model_name : str | |
| Name of the pre-trained model. | |
| device : str | |
| Device to run the model (e.g., "cpu" or "cuda"). | |
| model : MPSENet | |
| Pre-trained MPSENet model instance. | |
| """ | |
| def __init__( | |
| self, | |
| config_path: Annotated[str, "Path to the config file"], | |
| output_dir: Annotated[str, "Default directory to save enhanced audio files"] = ".temp" | |
| ) -> None: | |
| """ | |
| Initialize the SpeechEnhancement class. | |
| Parameters | |
| ---------- | |
| config_path : str | |
| Path to the configuration file specifying runtime settings. | |
| output_dir : str, optional | |
| Directory to save enhanced audio files. Defaults to ".temp". | |
| """ | |
| self.config = OmegaConf.load(config_path) | |
| self.output_dir = output_dir | |
| os.makedirs(self.output_dir, exist_ok=True) | |
| self.model_name = self.config.models.mpsenet.model_name | |
| self.device = self.config.runtime.device | |
| self.model = MPSENet.from_pretrained(self.model_name).to(self.device) | |
| def enhance_audio( | |
| self, | |
| input_path: Annotated[str, "Path to the original audio file"], | |
| output_path: Annotated[str, "Path to save the enhanced audio file"], | |
| noise_threshold: Annotated[float, "Noise threshold value to decide if enhancement is needed"], | |
| verbose: Annotated[bool, "Whether to log additional info to console"] = False, | |
| ) -> str: | |
| """ | |
| Enhance an audio file using the MPSENet model. | |
| Parameters | |
| ---------- | |
| input_path : str | |
| Path to the original input audio file. | |
| output_path : str | |
| Path to save the enhanced audio file. | |
| noise_threshold : float | |
| Noise threshold value to decide if enhancement is needed. | |
| verbose : bool, optional | |
| Whether to log additional info to the console. Defaults to False. | |
| Returns | |
| ------- | |
| str | |
| Path to the enhanced audio file if enhancement is performed, otherwise the original file path. | |
| Examples | |
| -------- | |
| >>> enhancer = SpeechEnhancement("config.yaml") | |
| >>> input_file = "raw_audio.wav" | |
| >>> output_file = "enhanced_audio.wav" | |
| >>> noise_thresh = 0.03 | |
| >>> result = enhancer.enhance_audio(input_file, output_file, noise_thresh) | |
| >>> print(result) | |
| enhanced_audio.wav | |
| """ | |
| raw_waveform, sr_raw = librosa.load(input_path, sr=None) | |
| noise_level = rms(y=raw_waveform).mean() | |
| if verbose: | |
| print(f"[SpeechEnhancement] Detected noise level: {noise_level:.6f}") | |
| if noise_level < noise_threshold: | |
| if verbose: | |
| print(f"[SpeechEnhancement] Noise level < {noise_threshold} → enhancement skipped.") | |
| return input_path | |
| sr_model = self.model.h.sampling_rate | |
| waveform, sr = librosa.load(input_path, sr=sr_model) | |
| if verbose: | |
| print(f"[SpeechEnhancement] Enhancement with MPSENet started using model: {self.model_name}") | |
| enhanced_waveform, sr_out, _ = self.model(waveform) | |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) | |
| sf.write(output_path, enhanced_waveform, sr_out) | |
| if verbose: | |
| print(f"[SpeechEnhancement] Enhancement complete. Saved to: {output_path}") | |
| return output_path | |
| if __name__ == "__main__": | |
| test_config_path = "config/config.yaml" | |
| noisy_audio_file = ".data/example/noisy/LookOncetoHearTargetSpeechHearingwithNoisyExamples.mp3" | |
| temp_dir = ".temp" | |
| denoiser = Denoiser(config_path=test_config_path, output_dir=temp_dir) | |
| denoised_path = denoiser.denoise_audio( | |
| input_path=noisy_audio_file, | |
| output_dir=temp_dir, | |
| noise_threshold=0.005, | |
| print_output=True | |
| ) | |
| if denoised_path == noisy_audio_file: | |
| print("Denoising skipped due to low noise level.") | |
| else: | |
| print(f"Denoising completed! Cleaned file saved at: {denoised_path}") | |
| speech_enhancer = SpeechEnhancement(config_path=test_config_path, output_dir=temp_dir) | |
| enhanced_audio_path = os.path.join(temp_dir, "enhanced_audio.wav") | |
| result_path = speech_enhancer.enhance_audio( | |
| input_path=denoised_path, | |
| output_path=enhanced_audio_path, | |
| noise_threshold=0.005, | |
| verbose=True | |
| ) | |
| if result_path == denoised_path: | |
| print("Enhancement skipped due to low noise level.") | |
| else: | |
| print(f"Speech enhancement completed! Enhanced file saved at: {result_path}") | |