| import gradio as gr |
| from transformers import pipeline |
| import numpy as np |
|
|
| classifier = pipeline( |
| "audio-classification", |
| model="dima806/bird_sounds_classification", |
| device=-1, |
| ) |
|
|
| |
| SPECIES_LIST = sorted(set( |
| classifier.model.config.id2label.values() |
| )) |
|
|
| def classify_bird(audio): |
| if audio is None: |
| return "Please upload or record an audio file." |
|
|
| sr, y = audio |
|
|
| |
| if y.dtype == np.int16: |
| y = y.astype(np.float32) / 32768.0 |
| elif y.dtype == np.int32: |
| y = y.astype(np.float32) / 2147483648.0 |
| elif y.dtype != np.float32: |
| y = y.astype(np.float32) |
|
|
| |
| if len(y.shape) > 1: |
| y = y[:, 0] |
|
|
| |
| if sr != 16000: |
| |
| duration = len(y) / sr |
| new_length = int(duration * 16000) |
| y = np.interp( |
| np.linspace(0, len(y) - 1, new_length), |
| np.arange(len(y)), |
| y, |
| ) |
| sr = 16000 |
|
|
| results = classifier({"sampling_rate": sr, "raw": y}, top_k=5) |
|
|
| |
| lines = [] |
| for i, pred in enumerate(results, 1): |
| score = pred["score"] |
| label = pred["label"] |
|
|
| if i == 1 and score < 0.40: |
| lines.append("Not confident - this may not be a recognizable bird song,") |
| lines.append("or the species may not be in this model's training data.") |
| lines.append(f"Best guess: {label} ({score:.0%})") |
| lines.append("") |
| lines.append("Top 5 predictions:") |
| lines.append(f" 1. {label} - {score:.1%}") |
| continue |
|
|
| bar_length = int(score * 20) |
| bar = "#" * bar_length + "." * (20 - bar_length) |
| lines.append(f"{i}. {label}") |
| lines.append(f" {bar} {score:.1%}") |
|
|
| return "\n".join(lines) |
|
|
|
|
| demo = gr.Interface( |
| fn=classify_bird, |
| inputs=gr.Audio( |
| label="Upload or Record a Bird Song", |
| type="numpy", |
| ), |
| outputs=gr.Textbox(label="Classification Results", lines=12), |
| title="Bird Song Classifier", |
| description=( |
| "Upload a bird song recording and this model will try to identify the species. " |
| "Uses dima806/bird_sounds_classification, a wav2vec2-based classifier trained on " |
| "50 bird species (mostly Tinamous, Guans, and Chachalacas - neotropical birds). " |
| "Best results with clean recordings of 3+ seconds.\n\n" |
| "Note: This model was trained on tropical/neotropical species. " |
| "It won't recognize common North American backyard birds like cardinals or robins. " |
| "That's a training data limitation, not an architecture limitation.\n\n" |
| "Try recordings from Xeno-Canto (https://xeno-canto.org/) - search for species like " |
| "Great Tinamou, Plain Chachalaca, or Crested Guan." |
| ), |
| article=( |
| "### Species this model knows\n\n" |
| + ", ".join(SPECIES_LIST) |
| + "\n\n---\n*Riley's Space 2 - AI + Research Level 2*" |
| ), |
| theme=gr.themes.Soft(), |
| ) |
|
|
| demo.launch() |