Roketman / app.py
opinder2906's picture
Update app.py
a098c5a verified
raw
history blame
4.12 kB
import gradio as gr
import tensorflow as tf
import librosa
import numpy as np
from PIL import Image
import requests
from io import BytesIO
# Load model - added caching
model = tf.keras.models.load_model("animal_sound_cnn.h5", compile=False)
# Updated class mapping
class_names = {
0: "Lion", 1: "Donkey", 2: "Cow", 3: "Cat", 4: "Dog",
5: "Sheep", 6: "Frog", 7: "Bird", 8: "Monkey", 9: "Chicken"
}
# Fixed image URLs with working links
# Updated reliable image URLs
image_urls = {
"Lion": "https://upload.wikimedia.org/wikipedia/commons/7/73/Lion_waiting_in_Namibia.jpg",
"Donkey": "https://upload.wikimedia.org/wikipedia/commons/8/88/Donkey_01.jpg",
"Cow": "https://upload.wikimedia.org/wikipedia/commons/0/0c/Cow_female_black_white.jpg",
"Cat": "https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg",
"Dog": "https://upload.wikimedia.org/wikipedia/commons/d/d9/Golden_Retriever_2.jpg",
"Sheep": "https://upload.wikimedia.org/wikipedia/commons/3/3a/Sheep_on_Pasture.jpg",
"Frog": "https://upload.wikimedia.org/wikipedia/commons/f/f9/Green_Frog_in_Wisconsin.jpg",
"Bird": "https://upload.wikimedia.org/wikipedia/commons/f/fd/Passer_domesticus_male.jpg",
"Monkey": "https://upload.wikimedia.org/wikipedia/commons/4/4e/Macaque_in_Japan.jpg",
"Chicken": "https://upload.wikimedia.org/wikipedia/commons/3/3b/Chicken_Red_Ranger.jpg"
}
def download_image(url):
"""Download image with fallback to placeholder"""
if not url:
return Image.new('RGB', (300, 200), color='gray')
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
return Image.open(BytesIO(response.content)).convert("RGB")
except Exception:
# Return placeholder if download fails
return Image.new('RGB', (300, 200), color='gray')
def preprocess(audio_path):
"""Audio preprocessing with error handling"""
try:
y, sr = librosa.load(audio_path, sr=22050, duration=3) # Limit to 3s
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
mfcc_mean = np.mean(mfcc, axis=1)
return mfcc_mean.reshape(1, 40, 1, 1).astype(np.float32)
except Exception as e:
raise ValueError(f"Audio processing error: {str(e)}")
def predict(audio_path):
try:
# Preprocess audio
X = preprocess(audio_path)
# Make prediction
pred = model.predict(X, verbose=0) # Disable verbose output
class_id = np.argmax(pred)
confidence = pred[0][class_id]
class_name = class_names.get(class_id, f"Unknown ({class_id})")
# Get image
img = download_image(image_urls.get(class_name, ""))
# Format output
text_output = (f"Predicted animal: {class_name}\n"
f"Confidence: {confidence*100:.2f}%")
return text_output, img
except Exception as e:
return f"Error: {str(e)}", download_image("") # Return error with blank image
# Create interface
with gr.Blocks() as demo:
gr.Markdown("# 🐾 Animal Sound Classifier")
gr.Markdown("Upload an animal sound (3-5 seconds) to identify the animal")
with gr.Row():
audio_input = gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="Animal Sound",
max_length=5 # Limit recording to 5 seconds
)
btn = gr.Button("Identify Animal", variant="primary")
with gr.Row():
text_output = gr.Textbox(label="Prediction Result", interactive=False)
image_output = gr.Image(label="Animal Image", height=300)
btn.click(
fn=predict,
inputs=audio_input,
outputs=[text_output, image_output]
)
gr.Examples(
examples=[
["examples/lion_roar.wav"],
["examples/dog_bark.wav"],
["examples/bird_chirp.wav"]
],
inputs=audio_input,
outputs=[text_output, image_output],
fn=predict,
cache_examples=True
)
# Launch with error display enabled
demo.launch(show_error=True)