Spaces:
Sleeping
Sleeping
File size: 1,847 Bytes
6c1314b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import base64
import io
import librosa
import numpy as np
import torch
import torchaudio
import soundfile as sf
def decode_audio(base64_string: str):
"""
Decodes a base64 string into an in-memory audio file-like object.
"""
try:
audio_data = base64.b64decode(base64_string)
return io.BytesIO(audio_data)
except Exception as e:
raise ValueError(f"Invalid Base64 audio data: {str(e)}")
def load_audio(file_obj, target_sr=16000):
"""
Loads audio from a file object using librosa/torchaudio.
Returns:
waveform (torch.Tensor): Audio waveform
sr (int): Sample rate
"""
# Load using librosa for robust format handling (MP3, etc)
y, sr = librosa.load(file_obj, sr=target_sr)
# Noise Reduction (Basic spectral gating) to reduce false positives from background noise
try:
import noisereduce as nr
# Assume noise is estimated from the whole clip (stationary)
y = nr.reduce_noise(y=y, sr=sr, stationary=True, prop_decrease=0.75)
except Exception as e:
print(f"Warning: Noise reduction failed: {e}")
# Convert to tensor
waveform = torch.tensor(y).unsqueeze(0) # (1, time)
return waveform, sr
def extract_heuristic_features(y, sr):
"""
Extracts simple spectral features for explainability.
"""
# Spectral Centroid
cent = librosa.feature.spectral_centroid(y=y, sr=sr)
mean_cent = np.mean(cent)
# Spectral Rolloff
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
mean_rolloff = np.mean(rolloff)
# Zero Crossing Rate
zcr = librosa.feature.zero_crossing_rate(y)
mean_zcr = np.mean(zcr)
return {
"spectral_centroid": float(mean_cent),
"spectral_rolloff": float(mean_rolloff),
"zero_crossing_rate": float(mean_zcr)
}
|