Spaces:

abedir
/

clstm_fastAPI

Sleeping

clstm_fastAPI / audio_utils.py

Upload 7 files

3461076 verified 28 days ago

714 Bytes

	import librosa
	import numpy as np
	import torch
	from config import CONFIG

	def preprocess_audio(path, device):
	y, _ = librosa.load(path, sr=CONFIG["sample_rate"])

	max_len = int(CONFIG["sample_rate"] * CONFIG["duration"])
	y = y[:max_len] if len(y) > max_len else np.pad(y, (0, max_len - len(y)))

	mel = librosa.feature.melspectrogram(
	y=y,
	sr=CONFIG["sample_rate"],
	n_fft=CONFIG["n_fft"],
	hop_length=CONFIG["hop_length"],
	n_mels=CONFIG["n_mels"]
	)

	mel_db = librosa.power_to_db(mel, ref=np.max)
	mel_db = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-9)

	tensor = torch.from_numpy(mel_db).unsqueeze(0).unsqueeze(0)
	return tensor.to(device)