Spaces:

amritn8
/

Roketman

Sleeping

App Files Files Community

Roketman / app.py

opinder2906

Update app.py

a098c5a verified 8 months ago

raw

history blame

4.12 kB

	import gradio as gr
	import tensorflow as tf
	import librosa
	import numpy as np
	from PIL import Image
	import requests
	from io import BytesIO

	# Load model - added caching
	model = tf.keras.models.load_model("animal_sound_cnn.h5", compile=False)

	# Updated class mapping
	class_names = {
	0: "Lion", 1: "Donkey", 2: "Cow", 3: "Cat", 4: "Dog",
	5: "Sheep", 6: "Frog", 7: "Bird", 8: "Monkey", 9: "Chicken"
	}

	# Fixed image URLs with working links
	# Updated reliable image URLs
	image_urls = {
	"Lion": "https://upload.wikimedia.org/wikipedia/commons/7/73/Lion_waiting_in_Namibia.jpg",
	"Donkey": "https://upload.wikimedia.org/wikipedia/commons/8/88/Donkey_01.jpg",
	"Cow": "https://upload.wikimedia.org/wikipedia/commons/0/0c/Cow_female_black_white.jpg",
	"Cat": "https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg",
	"Dog": "https://upload.wikimedia.org/wikipedia/commons/d/d9/Golden_Retriever_2.jpg",
	"Sheep": "https://upload.wikimedia.org/wikipedia/commons/3/3a/Sheep_on_Pasture.jpg",
	"Frog": "https://upload.wikimedia.org/wikipedia/commons/f/f9/Green_Frog_in_Wisconsin.jpg",
	"Bird": "https://upload.wikimedia.org/wikipedia/commons/f/fd/Passer_domesticus_male.jpg",
	"Monkey": "https://upload.wikimedia.org/wikipedia/commons/4/4e/Macaque_in_Japan.jpg",
	"Chicken": "https://upload.wikimedia.org/wikipedia/commons/3/3b/Chicken_Red_Ranger.jpg"
	}

	def download_image(url):
	"""Download image with fallback to placeholder"""
	if not url:
	return Image.new('RGB', (300, 200), color='gray')
	try:
	response = requests.get(url, timeout=10)
	response.raise_for_status()
	return Image.open(BytesIO(response.content)).convert("RGB")
	except Exception:
	# Return placeholder if download fails
	return Image.new('RGB', (300, 200), color='gray')


	def preprocess(audio_path):
	"""Audio preprocessing with error handling"""
	try:
	y, sr = librosa.load(audio_path, sr=22050, duration=3) # Limit to 3s
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
	mfcc_mean = np.mean(mfcc, axis=1)
	return mfcc_mean.reshape(1, 40, 1, 1).astype(np.float32)
	except Exception as e:
	raise ValueError(f"Audio processing error: {str(e)}")

	def predict(audio_path):
	try:
	# Preprocess audio
	X = preprocess(audio_path)

	# Make prediction
	pred = model.predict(X, verbose=0) # Disable verbose output
	class_id = np.argmax(pred)
	confidence = pred[0][class_id]
	class_name = class_names.get(class_id, f"Unknown ({class_id})")

	# Get image
	img = download_image(image_urls.get(class_name, ""))

	# Format output
	text_output = (f"Predicted animal: {class_name}\n"
	f"Confidence: {confidence*100:.2f}%")
	return text_output, img

	except Exception as e:
	return f"Error: {str(e)}", download_image("") # Return error with blank image

	# Create interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🐾 Animal Sound Classifier")
	gr.Markdown("Upload an animal sound (3-5 seconds) to identify the animal")

	with gr.Row():
	audio_input = gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Animal Sound",
	max_length=5 # Limit recording to 5 seconds
	)

	btn = gr.Button("Identify Animal", variant="primary")

	with gr.Row():
	text_output = gr.Textbox(label="Prediction Result", interactive=False)
	image_output = gr.Image(label="Animal Image", height=300)

	btn.click(
	fn=predict,
	inputs=audio_input,
	outputs=[text_output, image_output]
	)

	gr.Examples(
	examples=[
	["examples/lion_roar.wav"],
	["examples/dog_bark.wav"],
	["examples/bird_chirp.wav"]
	],
	inputs=audio_input,
	outputs=[text_output, image_output],
	fn=predict,
	cache_examples=True
	)

	# Launch with error display enabled
	demo.launch(show_error=True)