Spaces:

profplate
/

bird-song-classifier

Paused

App Files Files Community

bird-song-classifier / app.py

profplate

Update app.py

6cc7f13 verified about 2 months ago

raw

history blame contribute delete

3.18 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np

	classifier = pipeline(
	"audio-classification",
	model="dima806/bird_sounds_classification",
	device=-1,
	)

	# Get the full species list from the model config
	SPECIES_LIST = sorted(set(
	classifier.model.config.id2label.values()
	))

	def classify_bird(audio):
	if audio is None:
	return "Please upload or record an audio file."

	sr, y = audio

	# Convert to float32 and normalize
	if y.dtype == np.int16:
	y = y.astype(np.float32) / 32768.0
	elif y.dtype == np.int32:
	y = y.astype(np.float32) / 2147483648.0
	elif y.dtype != np.float32:
	y = y.astype(np.float32)

	# If stereo, take first channel
	if len(y.shape) > 1:
	y = y[:, 0]

	# Resample to 16kHz if needed (model expects 16kHz)
	if sr != 16000:
	# Simple resampling using numpy interpolation
	duration = len(y) / sr
	new_length = int(duration * 16000)
	y = np.interp(
	np.linspace(0, len(y) - 1, new_length),
	np.arange(len(y)),
	y,
	)
	sr = 16000

	results = classifier({"sampling_rate": sr, "raw": y}, top_k=5)

	# Format output
	lines = []
	for i, pred in enumerate(results, 1):
	score = pred["score"]
	label = pred["label"]

	if i == 1 and score < 0.40:
	lines.append("Not confident - this may not be a recognizable bird song,")
	lines.append("or the species may not be in this model's training data.")
	lines.append(f"Best guess: {label} ({score:.0%})")
	lines.append("")
	lines.append("Top 5 predictions:")
	lines.append(f" 1. {label} - {score:.1%}")
	continue

	bar_length = int(score * 20)
	bar = "#" * bar_length + "." * (20 - bar_length)
	lines.append(f"{i}. {label}")
	lines.append(f" {bar} {score:.1%}")

	return "\n".join(lines)


	demo = gr.Interface(
	fn=classify_bird,
	inputs=gr.Audio(
	label="Upload or Record a Bird Song",
	type="numpy",
	),
	outputs=gr.Textbox(label="Classification Results", lines=12),
	title="Bird Song Classifier",
	description=(
	"Upload a bird song recording and this model will try to identify the species. "
	"Uses dima806/bird_sounds_classification, a wav2vec2-based classifier trained on "
	"50 bird species (mostly Tinamous, Guans, and Chachalacas - neotropical birds). "
	"Best results with clean recordings of 3+ seconds.\n\n"
	"Note: This model was trained on tropical/neotropical species. "
	"It won't recognize common North American backyard birds like cardinals or robins. "
	"That's a training data limitation, not an architecture limitation.\n\n"
	"Try recordings from Xeno-Canto (https://xeno-canto.org/) - search for species like "
	"Great Tinamou, Plain Chachalaca, or Crested Guan."
	),
	article=(
	"### Species this model knows\n\n"
	+ ", ".join(SPECIES_LIST)
	+ "\n\n---\nRiley's Space 2 - AI + Research Level 2"
	),
	theme=gr.themes.Soft(),
	)

	demo.launch()