File size: 5,255 Bytes
1c87015 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | """
HuggingFace Gradio Space — Bee Audio Classifier
This Space loads the trained gradient-boosting model from the HF Hub repo
and exposes a single /predict API endpoint that accepts a WAV audio file
and returns the hive state classification.
Your FastAPI service POSTs audio bytes here — the model never touches
the FastAPI server.
Deploy this file (plus requirements.txt) as a new HuggingFace Space of
type "Gradio". Set HF_TOKEN in the Space secrets if the model repo is private.
"""
import json
import gradio as gr
import joblib
import librosa
import numpy as np
from huggingface_hub import hf_hub_download
# ---------------------------------------------------------------------------
# Load model + encoder at Space startup (runs on HF's servers, not yours)
# ---------------------------------------------------------------------------
REPO_ID = "DerrickLegacy256/bee_swarming_and_absconment"
_model_path = hf_hub_download(REPO_ID, "gradient_boosting_model.pkl")
_encoder_path = hf_hub_download(REPO_ID, "label_encoder.pkl")
_model = joblib.load(_model_path)
_label_encoder = joblib.load(_encoder_path)
print(f"Model loaded from {REPO_ID}")
# ---------------------------------------------------------------------------
# Feature extraction — 171 features, identical to the training pipeline
# ---------------------------------------------------------------------------
def _extract_features(y: np.ndarray, sr: int) -> np.ndarray:
feats: dict = {}
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40, n_fft=2048, hop_length=512)
for i in range(40):
feats[f"mfcc_{i}_mean"] = float(np.mean(mfcc[i]))
feats[f"mfcc_{i}_std"] = float(np.std(mfcc[i]))
delta = librosa.feature.delta(mfcc)
for i in range(40):
feats[f"mfcc_delta_{i}_mean"] = float(np.mean(delta[i]))
chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=2048, hop_length=512)
for i in range(12):
feats[f"chroma_{i}_mean"] = float(np.mean(chroma[i]))
feats[f"chroma_{i}_std"] = float(np.std(chroma[i]))
mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=512)
mel_db = librosa.power_to_db(mel, ref=np.max)
feats["mel_mean"] = float(np.mean(mel_db))
feats["mel_std"] = float(np.std(mel_db))
feats["mel_max"] = float(np.max(mel_db))
feats["mel_min"] = float(np.min(mel_db))
sc = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=512)
feats["spectral_centroid_mean"] = float(np.mean(sc))
feats["spectral_centroid_std"] = float(np.std(sc))
sb = librosa.feature.spectral_bandwidth(y=y, sr=sr, hop_length=512)
feats["spectral_bandwidth_mean"] = float(np.mean(sb))
feats["spectral_bandwidth_std"] = float(np.std(sb))
sr_f = librosa.feature.spectral_rolloff(y=y, sr=sr, hop_length=512)
feats["spectral_rolloff_mean"] = float(np.mean(sr_f))
feats["spectral_rolloff_std"] = float(np.std(sr_f))
contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=512)
for i in range(contrast.shape[0]):
feats[f"spectral_contrast_{i}_mean"] = float(np.mean(contrast[i]))
zcr = librosa.feature.zero_crossing_rate(y, hop_length=512)
feats["zcr_mean"] = float(np.mean(zcr))
feats["zcr_std"] = float(np.std(zcr))
rms = librosa.feature.rms(y=y, hop_length=512)
feats["rms_mean"] = float(np.mean(rms))
feats["rms_std"] = float(np.std(rms))
harmonic = librosa.effects.harmonic(y)
tonnetz = librosa.feature.tonnetz(y=harmonic, sr=sr)
for i in range(6):
feats[f"tonnetz_{i}_mean"] = float(np.mean(tonnetz[i]))
return np.array(list(feats.values())).reshape(1, -1)
# ---------------------------------------------------------------------------
# Prediction function — called by the Gradio interface
# ---------------------------------------------------------------------------
def predict(audio_path: str) -> dict:
"""
Accept a WAV file path, run the full classification pipeline,
return {"label": "...", "score": 0.XX, "all_scores": {...}}.
"""
y, sr = librosa.load(audio_path, sr=22050)
y = y[:int(5.0 * sr)] # first 5 seconds only (matches training)
vector = _extract_features(y, sr)
class_index = _model.predict(vector)[0]
proba = _model.predict_proba(vector)[0]
label = _label_encoder.classes_[class_index]
confidence = float(proba[class_index])
all_scores = {
cls: float(p)
for cls, p in zip(_label_encoder.classes_, proba)
}
return {"label": label, "score": confidence, "all_scores": all_scores}
# ---------------------------------------------------------------------------
# Gradio interface — exposes /api/predict for programmatic access
# ---------------------------------------------------------------------------
iface = gr.Interface(
fn=predict,
inputs=gr.Audio(type="filepath", label="Hive audio recording"),
outputs=gr.JSON(label="Classification result"),
title="Bee Swarming & Abscondment Audio Classifier",
description=(
"Upload a WAV recording from a hive. "
"Returns: active_colony | swarming | missing_queen | queenbee_present | external_noise"
),
api_name="predict",
)
iface.launch()
|