import base64 import io import librosa import numpy as np import torch import torchaudio import soundfile as sf def decode_audio(base64_string: str): """ Decodes a base64 string into an in-memory audio file-like object. """ try: audio_data = base64.b64decode(base64_string) return io.BytesIO(audio_data) except Exception as e: raise ValueError(f"Invalid Base64 audio data: {str(e)}") def load_audio(file_obj, target_sr=16000): """ Loads audio from a file object using librosa/torchaudio. Returns: waveform (torch.Tensor): Audio waveform sr (int): Sample rate """ # Load using librosa for robust format handling (MP3, etc) y, sr = librosa.load(file_obj, sr=target_sr) # Noise Reduction (Basic spectral gating) to reduce false positives from background noise try: import noisereduce as nr # Assume noise is estimated from the whole clip (stationary) y = nr.reduce_noise(y=y, sr=sr, stationary=True, prop_decrease=0.75) except Exception as e: print(f"Warning: Noise reduction failed: {e}") # Convert to tensor waveform = torch.tensor(y).unsqueeze(0) # (1, time) return waveform, sr def extract_heuristic_features(y, sr): """ Extracts simple spectral features for explainability. """ # Spectral Centroid cent = librosa.feature.spectral_centroid(y=y, sr=sr) mean_cent = np.mean(cent) # Spectral Rolloff rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) mean_rolloff = np.mean(rolloff) # Zero Crossing Rate zcr = librosa.feature.zero_crossing_rate(y) mean_zcr = np.mean(zcr) return { "spectral_centroid": float(mean_cent), "spectral_rolloff": float(mean_rolloff), "zero_crossing_rate": float(mean_zcr) }