Spaces:

ArtCloud
/

Callytics

Sleeping

Callytics / src /audio /preprocessing.py

Zubik Aliaksandr

Initial Space snapshot (Docker SDK, Gradio UI, port 7860)

d60c606 17 days ago

9.25 kB

	# Standard library imports
	import os
	from typing import Annotated

	# Related third-party imports
	import librosa
	import soundfile as sf
	from librosa.feature import rms
	from omegaconf import OmegaConf
	from noisereduce import reduce_noise
	from MPSENet import MPSENet

	# Local imports
	from src.utils.utils import Logger


	class Denoiser:
	"""
	A class to handle audio denoising using librosa and noisereduce.

	This class provides methods to load noisy audio, apply denoising, and
	save the cleaned output to disk.

	Parameters
	----------
	config_path : str
	Path to the configuration file that specifies runtime settings.
	output_dir : str, optional
	Directory to save cleaned audio files. Defaults to ".temp".

	Attributes
	----------
	config : omegaconf.DictConfig
	Loaded configuration data.
	output_dir : str
	Directory to save cleaned audio files.
	logger : Logger
	Logger instance for recording messages.
	"""

	def __init__(self, config_path: Annotated[str, "Path to the config file"],
	output_dir: Annotated[str, "Default directory to save cleaned audio files"] = ".temp") -> None:
	"""
	Initialize the Denoiser class.

	Parameters
	----------
	config_path : str
	Path to the configuration file that specifies runtime settings.
	output_dir : str, optional
	Default directory to save cleaned audio files. Defaults to ".temp".
	"""
	self.config = OmegaConf.load(config_path)
	self.output_dir = output_dir
	os.makedirs(self.output_dir, exist_ok=True)
	self.logger = Logger(name="DenoiserLogger")

	def denoise_audio(
	self,
	input_path: Annotated[str, "Path to the noisy audio file"],
	output_dir: Annotated[str, "Directory to save the cleaned audio file"],
	noise_threshold: Annotated[float, "Noise threshold value to decide if denoising is needed"],
	print_output: Annotated[bool, "Whether to log the process to console"] = False,
	) -> str:
	"""
	Denoise an audio file using noisereduce and librosa.

	Parameters
	----------
	input_path : str
	Path to the noisy input audio file.
	output_dir : str
	Directory to save the cleaned audio file.
	noise_threshold : float
	Noise threshold value to decide if denoising is needed.
	print_output : bool, optional
	Whether to log the process to the console. Defaults to False.

	Returns
	-------
	str
	Path to the saved audio file if denoising is performed, otherwise the original audio file path.

	Examples
	--------
	>>> denoise = Denoiser("config.yaml")
	>>> input_file = "noisy_audio.wav"
	>>> output_directory = "cleaned_audio"
	>>> noise_thresh = 0.02
	>>> result = denoiser.denoise_audio(input_file, output_directory, noise_thresh)
	>>> print(result)
	cleaned_audio/denoised.wav
	"""
	self.logger.log(f"Loading: {input_path}", print_output=print_output)

	noisy_waveform, sr = librosa.load(input_path, sr=None)

	noise_level = rms(y=noisy_waveform).mean()
	self.logger.log(f"Calculated noise level: {noise_level}", print_output=print_output)

	if noise_level < noise_threshold:
	self.logger.log("Noise level is below the threshold. Skipping denoising.", print_output=print_output)
	return input_path

	self.logger.log("Denoising process started...", print_output=print_output)

	cleaned_waveform = reduce_noise(y=noisy_waveform, sr=sr)

	output_path = os.path.join(output_dir, "denoised.wav")

	os.makedirs(output_dir, exist_ok=True)

	sf.write(output_path, cleaned_waveform, sr)

	self.logger.log(f"Denoising completed! Cleaned file: {output_path}", print_output=print_output)

	return output_path


	class SpeechEnhancement:
	"""
	A class for speech enhancement using the MPSENet model.

	This class provides methods to load audio, apply enhancement using a
	pre-trained MPSENet model, and save the enhanced output.

	Parameters
	----------
	config_path : str
	Path to the configuration file specifying runtime settings.
	output_dir : str, optional
	Directory to save enhanced audio files. Defaults to ".temp".

	Attributes
	----------
	config : omegaconf.DictConfig
	Loaded configuration data.
	output_dir : str
	Directory to save enhanced audio files.
	model_name : str
	Name of the pre-trained model.
	device : str
	Device to run the model (e.g., "cpu" or "cuda").
	model : MPSENet
	Pre-trained MPSENet model instance.
	"""

	def __init__(
	self,
	config_path: Annotated[str, "Path to the config file"],
	output_dir: Annotated[str, "Default directory to save enhanced audio files"] = ".temp"
	) -> None:
	"""
	Initialize the SpeechEnhancement class.

	Parameters
	----------
	config_path : str
	Path to the configuration file specifying runtime settings.
	output_dir : str, optional
	Directory to save enhanced audio files. Defaults to ".temp".
	"""
	self.config = OmegaConf.load(config_path)
	self.output_dir = output_dir
	os.makedirs(self.output_dir, exist_ok=True)

	self.model_name = self.config.models.mpsenet.model_name
	self.device = self.config.runtime.device

	self.model = MPSENet.from_pretrained(self.model_name).to(self.device)

	def enhance_audio(
	self,
	input_path: Annotated[str, "Path to the original audio file"],
	output_path: Annotated[str, "Path to save the enhanced audio file"],
	noise_threshold: Annotated[float, "Noise threshold value to decide if enhancement is needed"],
	verbose: Annotated[bool, "Whether to log additional info to console"] = False,
	) -> str:
	"""
	Enhance an audio file using the MPSENet model.

	Parameters
	----------
	input_path : str
	Path to the original input audio file.
	output_path : str
	Path to save the enhanced audio file.
	noise_threshold : float
	Noise threshold value to decide if enhancement is needed.
	verbose : bool, optional
	Whether to log additional info to the console. Defaults to False.

	Returns
	-------
	str
	Path to the enhanced audio file if enhancement is performed, otherwise the original file path.

	Examples
	--------
	>>> enhancer = SpeechEnhancement("config.yaml")
	>>> input_file = "raw_audio.wav"
	>>> output_file = "enhanced_audio.wav"
	>>> noise_thresh = 0.03
	>>> result = enhancer.enhance_audio(input_file, output_file, noise_thresh)
	>>> print(result)
	enhanced_audio.wav
	"""
	raw_waveform, sr_raw = librosa.load(input_path, sr=None)
	noise_level = rms(y=raw_waveform).mean()

	if verbose:
	print(f"[SpeechEnhancement] Detected noise level: {noise_level:.6f}")

	if noise_level < noise_threshold:
	if verbose:
	print(f"[SpeechEnhancement] Noise level < {noise_threshold} → enhancement skipped.")
	return input_path

	sr_model = self.model.h.sampling_rate
	waveform, sr = librosa.load(input_path, sr=sr_model)

	if verbose:
	print(f"[SpeechEnhancement] Enhancement with MPSENet started using model: {self.model_name}")

	enhanced_waveform, sr_out, _ = self.model(waveform)

	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	sf.write(output_path, enhanced_waveform, sr_out)

	if verbose:
	print(f"[SpeechEnhancement] Enhancement complete. Saved to: {output_path}")

	return output_path


	if __name__ == "__main__":

	test_config_path = "config/config.yaml"
	noisy_audio_file = ".data/example/noisy/LookOncetoHearTargetSpeechHearingwithNoisyExamples.mp3"
	temp_dir = ".temp"

	denoiser = Denoiser(config_path=test_config_path, output_dir=temp_dir)
	denoised_path = denoiser.denoise_audio(
	input_path=noisy_audio_file,
	output_dir=temp_dir,
	noise_threshold=0.005,
	print_output=True
	)
	if denoised_path == noisy_audio_file:
	print("Denoising skipped due to low noise level.")
	else:
	print(f"Denoising completed! Cleaned file saved at: {denoised_path}")

	speech_enhancer = SpeechEnhancement(config_path=test_config_path, output_dir=temp_dir)
	enhanced_audio_path = os.path.join(temp_dir, "enhanced_audio.wav")

	result_path = speech_enhancer.enhance_audio(
	input_path=denoised_path,
	output_path=enhanced_audio_path,
	noise_threshold=0.005,
	verbose=True
	)

	if result_path == denoised_path:
	print("Enhancement skipped due to low noise level.")
	else:
	print(f"Speech enhancement completed! Enhanced file saved at: {result_path}")