amritn8's picture
Update app.py
2843631 verified
import tensorflow as tf
import joblib
import numpy as np
import gradio as gr
from scipy.io import wavfile
# Load model and label encoder
model = tf.keras.models.load_model("animal_sound_cnn.keras")
label_encoder = joblib.load("label_encoder.joblib")
def preprocess_audio(audio_path):
"""Simple audio preprocessing for animal sounds"""
try:
# 1. Load audio file (convert to mono if stereo)
sr, y = wavfile.read(audio_path)
y = np.mean(y, axis=1) if len(y.shape) > 1 else y
y = y.astype(np.float32) / np.max(np.abs(y)) # Normalize
# 2. Create spectrogram (adjust these parameters to match your training)
spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
spectrogram = tf.abs(spectrogram) # Magnitude
# 3. Reshape to what your model expects (1, 384)
flattened = tf.reshape(spectrogram, (1, -1)) # Flatten all
if flattened.shape[1] < 384:
flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]])
else:
flattened = flattened[:, :384] # Trim if too long
return flattened.numpy()
except Exception as e:
print(f"Audio processing error: {str(e)}")
return None
def predict(audio_path):
try:
# Process audio
processed = preprocess_audio(audio_path)
if processed is None:
return "Error: Couldn't process audio"
# Debug output
print(f"Model input shape: {processed.shape}")
# Predict and return animal name
pred = model.predict(processed)
return label_encoder.inverse_transform([np.argmax(pred)])[0]
except Exception as e:
return f"Prediction error: {str(e)}"
# Create simple interface
gr.Interface(
fn=predict,
inputs=gr.Audio(type="filepath"),
outputs="label",
title="Animal Sound Classifier",
description="Upload a short animal sound (2-5 seconds)"
).launch()