Spaces:

amritn8
/

AnimalSoundClassifier

Sleeping

File size: 2,032 Bytes

import tensorflow as tf
import joblib
import numpy as np
import gradio as gr
from scipy.io import wavfile

# Load model and label encoder
model = tf.keras.models.load_model("animal_sound_cnn.keras")
label_encoder = joblib.load("label_encoder.joblib")

def preprocess_audio(audio_path):
    """Simple audio preprocessing for animal sounds"""
    try:
        # 1. Load audio file (convert to mono if stereo)
        sr, y = wavfile.read(audio_path)
        y = np.mean(y, axis=1) if len(y.shape) > 1 else y
        y = y.astype(np.float32) / np.max(np.abs(y))  # Normalize
        
        # 2. Create spectrogram (adjust these parameters to match your training)
        spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
        spectrogram = tf.abs(spectrogram)  # Magnitude
        
        # 3. Reshape to what your model expects (1, 384)
        flattened = tf.reshape(spectrogram, (1, -1))  # Flatten all
        if flattened.shape[1] < 384:
            flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]])
        else:
            flattened = flattened[:, :384]  # Trim if too long
            
        return flattened.numpy()
        
    except Exception as e:
        print(f"Audio processing error: {str(e)}")
        return None

def predict(audio_path):
    try:
        # Process audio
        processed = preprocess_audio(audio_path)
        if processed is None:
            return "Error: Couldn't process audio"
            
        # Debug output
        print(f"Model input shape: {processed.shape}")
        
        # Predict and return animal name
        pred = model.predict(processed)
        return label_encoder.inverse_transform([np.argmax(pred)])[0]
        
    except Exception as e:
        return f"Prediction error: {str(e)}"

# Create simple interface
gr.Interface(
    fn=predict,
    inputs=gr.Audio(type="filepath"),
    outputs="label",
    title="Animal Sound Classifier",
    description="Upload a short animal sound (2-5 seconds)"
).launch()