import gradio as gr from transformers import pipeline import numpy as np # ─── LOAD MODEL ─────────────────────────────────────────── classifier = pipeline( "audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593" ) # ─── THREAT MAPPING ─────────────────────────────────────── def map_label(label, score): label = label.lower() if "engine" in label: return f"🪚 Chainsaw detected ({round(score, 2)})" if "gunshot" in label or "bang" in label or "explosion" in label: return f"🔫 Gunshot detected ({round(score, 2)})" if "vehicle" in label or "car" in label or "truck" in label: return f"🚗 Vehicle detected ({round(score, 2)})" if "speech" in label or "talk" in label or "shout" in label: return f"👤 Human detected ({round(score, 2)})" return f"🌿 Safe: {label} ({round(score, 2)})" # ─── PREDICTION ─────────────────────────────────────────── def predict(audio): if audio is None: return "🎤 Waiting for audio..." sr, data = audio # Convert stereo → mono if len(data.shape) > 1: data = data.mean(axis=1) data = data.astype(np.float32) results = classifier({ "raw": data, "sampling_rate": sr }) top = results[0] # 🔥 APPLY MAPPING HERE return map_label(top["label"], top["score"]) # ─── UI ─────────────────────────────────────────────────── with gr.Blocks() as demo: gr.Markdown("## 🌲 Forest Guardian - Live Audio Detection") mic = gr.Audio( sources=["microphone"], streaming=True, # 🔥 LIVE STREAMING type="numpy" ) output = gr.Textbox(label="Detection Result") mic.stream(fn=predict, inputs=mic, outputs=output) # ─── RUN ────────────────────────────────────────────────── demo.launch()