import gradio as gr from transformers import pipeline import numpy as np classifier = pipeline( "audio-classification", model="dima806/bird_sounds_classification", device=-1, ) # Get the full species list from the model config SPECIES_LIST = sorted(set( classifier.model.config.id2label.values() )) def classify_bird(audio): if audio is None: return "Please upload or record an audio file." sr, y = audio # Convert to float32 and normalize if y.dtype == np.int16: y = y.astype(np.float32) / 32768.0 elif y.dtype == np.int32: y = y.astype(np.float32) / 2147483648.0 elif y.dtype != np.float32: y = y.astype(np.float32) # If stereo, take first channel if len(y.shape) > 1: y = y[:, 0] # Resample to 16kHz if needed (model expects 16kHz) if sr != 16000: # Simple resampling using numpy interpolation duration = len(y) / sr new_length = int(duration * 16000) y = np.interp( np.linspace(0, len(y) - 1, new_length), np.arange(len(y)), y, ) sr = 16000 results = classifier({"sampling_rate": sr, "raw": y}, top_k=5) # Format output lines = [] for i, pred in enumerate(results, 1): score = pred["score"] label = pred["label"] if i == 1 and score < 0.40: lines.append("Not confident - this may not be a recognizable bird song,") lines.append("or the species may not be in this model's training data.") lines.append(f"Best guess: {label} ({score:.0%})") lines.append("") lines.append("Top 5 predictions:") lines.append(f" 1. {label} - {score:.1%}") continue bar_length = int(score * 20) bar = "#" * bar_length + "." * (20 - bar_length) lines.append(f"{i}. {label}") lines.append(f" {bar} {score:.1%}") return "\n".join(lines) demo = gr.Interface( fn=classify_bird, inputs=gr.Audio( label="Upload or Record a Bird Song", type="numpy", ), outputs=gr.Textbox(label="Classification Results", lines=12), title="Bird Song Classifier", description=( "Upload a bird song recording and this model will try to identify the species. " "Uses dima806/bird_sounds_classification, a wav2vec2-based classifier trained on " "50 bird species (mostly Tinamous, Guans, and Chachalacas - neotropical birds). " "Best results with clean recordings of 3+ seconds.\n\n" "Note: This model was trained on tropical/neotropical species. " "It won't recognize common North American backyard birds like cardinals or robins. " "That's a training data limitation, not an architecture limitation.\n\n" "Try recordings from Xeno-Canto (https://xeno-canto.org/) - search for species like " "Great Tinamou, Plain Chachalaca, or Crested Guan." ), article=( "### Species this model knows\n\n" + ", ".join(SPECIES_LIST) + "\n\n---\n*Riley's Space 2 - AI + Research Level 2*" ), theme=gr.themes.Soft(), ) demo.launch()