import os, sys, torch, joblib, importlib import pandas as pd import torchaudio.transforms as T class BioacousticEngine: def __init__(self, repo_dir="tiny-bird-diffusion"): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.repo_dir = repo_dir sys.path.append(os.path.abspath(repo_dir)) from cvt import cvt13 mel_module = importlib.import_module("mel_spectrogram") self.preprocessor = mel_module.MelSpectrogramProcessor(device=self.device) self.model = cvt13() self.model.load_state_dict(torch.load(f"{repo_dir}/protoclr.pth", map_location="cpu")) self.model = self.model.to(self.device).eval() brain_data = joblib.load(f"{repo_dir}/trained_cluster_brain.joblib") self.reducer = brain_data['umap'] self.df = pd.read_csv(f"{repo_dir}/acoustic_atlas_metadata.csv") def process_waveform(self, waveform, sample_rate): if sample_rate != 16000: waveform = T.Resample(orig_freq=sample_rate, new_freq=16000)(waveform) if waveform.shape[0] > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) total_samples = waveform.shape[-1] target_samples = 3 * 16000 if total_samples > target_samples: step, max_energy, best_start = 4000, -1, 0 for start in range(0, total_samples - target_samples + 1, step): energy = waveform[:, start:start + target_samples].abs().mean().item() if energy > max_energy: max_energy, best_start = energy, start waveform = waveform[:, best_start:best_start + target_samples] if waveform.abs().max() > 0.02: waveform = waveform / waveform.abs().max() return waveform.to(self.device)