Spaces:
Paused
Paused
| import numpy as np | |
| import librosa | |
| from scipy import signal | |
| from .slicer2 import Slicer | |
| class Preprocessor: | |
| def __init__( | |
| self, sr: int, max_slice_length: float = 3.0, min_slice_length: float = 0.5 | |
| ): | |
| self.slicer = Slicer( | |
| sr=sr, | |
| threshold=-42, | |
| min_length=1500, | |
| min_interval=400, | |
| hop_size=15, | |
| max_sil_kept=500, | |
| ) | |
| self.sr = sr | |
| self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr) | |
| self.max_slice_length = max_slice_length | |
| self.min_slice_length = min_slice_length | |
| self.overlap = 0.3 | |
| self.tail = self.max_slice_length + self.overlap | |
| self.max = 0.9 | |
| self.alpha = 0.75 | |
| def norm(self, samples: np.ndarray) -> np.ndarray: | |
| sample_max = np.abs(samples).max() | |
| normalized = samples / sample_max * self.max | |
| normalized = (normalized * self.alpha) + (samples * (1 - self.alpha)) | |
| return normalized | |
| def preprocess_audio(self, y: np.ndarray) -> list[np.ndarray]: | |
| y = signal.filtfilt(self.bh, self.ah, y) | |
| audios = [] | |
| for audio in self.slicer.slice(y): | |
| i = 0 | |
| while True: | |
| start = int(self.sr * (self.max_slice_length - self.overlap) * i) | |
| i += 1 | |
| if len(audio[start:]) > self.tail * self.sr: | |
| slice = audio[start : start + int(self.max_slice_length * self.sr)] | |
| audios.append(self.norm(slice)) | |
| else: | |
| slice = audio[start:] | |
| if len(slice) > self.min_slice_length * self.sr: | |
| audios.append(self.norm(slice)) | |
| break | |
| return audios | |
| def preprocess_file(self, file_path: str) -> list[np.ndarray]: | |
| y, _ = librosa.load(file_path, sr=self.sr) | |
| return self.preprocess_audio(y) | |