Spaces:

kepsmiling121
/

ljsjdwe

Sleeping

App Files Files Community

ljsjdwe / utils /audio_processor.py

kepsmiling121

Create utils/audio_processor.py

8bd48f7 verified 25 days ago

raw

history blame contribute delete

2.84 kB

	"""
	Audio processing utilities
	"""
	import numpy as np
	import librosa
	import soundfile as sf
	from typing import Tuple, Optional
	import logging

	logger = logging.getLogger(__name__)

	class AudioProcessor:
	def __init__(self):
	self.sample_rate = 16000 # Default sample rate for models

	def load_audio(self, file_path: str) -> Tuple[np.ndarray, int]:
	"""Load audio file and convert to appropriate format"""
	try:
	audio, sr = librosa.load(file_path, sr=self.sample_rate)
	return audio, sr
	except Exception as e:
	logger.error(f"Failed to load audio: {str(e)}")
	raise

	def save_audio(self, audio_array: np.ndarray, output_path: str, sample_rate: int = None):
	"""Save audio array to file"""
	try:
	sr = sample_rate or self.sample_rate
	sf.write(output_path, audio_array, sr)
	logger.info(f"Audio saved to {output_path}")
	except Exception as e:
	logger.error(f"Failed to save audio: {str(e)}")
	raise

	def normalize_audio(self, audio: np.ndarray) -> np.ndarray:
	"""Normalize audio to [-1, 1] range"""
	return audio / np.max(np.abs(audio))

	def trim_silence(self, audio: np.ndarray, threshold: float = 0.01) -> np.ndarray:
	"""Remove silence from beginning and end"""
	return librosa.effects.trim(audio, top_db=20, frame_length=512, hop_length=256)[0]

	def change_speed(self, audio: np.ndarray, speed_factor: float) -> np.ndarray:
	"""Change playback speed without changing pitch"""
	return librosa.effects.time_stretch(audio, rate=speed_factor)

	def change_pitch(self, audio: np.ndarray, n_steps: float) -> np.ndarray:
	"""Change pitch by n semitones"""
	return librosa.effects.pitch_shift(audio, sr=self.sample_rate, n_steps=n_steps)

	def get_spectrogram(self, audio: np.ndarray) -> np.ndarray:
	"""Generate spectrogram for visualization"""
	return librosa.stft(audio)

	def get_tempo(self, audio: np.ndarray) -> float:
	"""Estimate tempo (BPM)"""
	tempo, _ = librosa.beat.beat_track(y=audio, sr=self.sample_rate)
	return tempo

	def apply_fade(self, audio: np.ndarray, fade_in: float = 0.1, fade_out: float = 0.1) -> np.ndarray:
	"""Apply fade in/out"""
	fade_in_samples = int(fade_in * self.sample_rate)
	fade_out_samples = int(fade_out * self.sample_rate)

	if fade_in_samples > 0:
	fade_in_curve = np.linspace(0, 1, fade_in_samples)
	audio[:fade_in_samples] *= fade_in_curve

	if fade_out_samples > 0:
	fade_out_curve = np.linspace(1, 0, fade_out_samples)
	audio[-fade_out_samples:] *= fade_out_curve

	return audio