Spaces:
Sleeping
Sleeping
File size: 2,032 Bytes
015979f 2843631 015979f b2f5328 2843631 015979f 2843631 b2f5328 2843631 015979f 2843631 ec416c2 2843631 b2f5328 2843631 015979f 2843631 015979f 2843631 b2f5328 2843631 015979f 2843631 015979f 2843631 b2f5328 b02773c 015979f 2843631 015979f 2843631 015979f 2843631 b2f5328 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import tensorflow as tf
import joblib
import numpy as np
import gradio as gr
from scipy.io import wavfile
# Load model and label encoder
model = tf.keras.models.load_model("animal_sound_cnn.keras")
label_encoder = joblib.load("label_encoder.joblib")
def preprocess_audio(audio_path):
"""Simple audio preprocessing for animal sounds"""
try:
# 1. Load audio file (convert to mono if stereo)
sr, y = wavfile.read(audio_path)
y = np.mean(y, axis=1) if len(y.shape) > 1 else y
y = y.astype(np.float32) / np.max(np.abs(y)) # Normalize
# 2. Create spectrogram (adjust these parameters to match your training)
spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
spectrogram = tf.abs(spectrogram) # Magnitude
# 3. Reshape to what your model expects (1, 384)
flattened = tf.reshape(spectrogram, (1, -1)) # Flatten all
if flattened.shape[1] < 384:
flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]])
else:
flattened = flattened[:, :384] # Trim if too long
return flattened.numpy()
except Exception as e:
print(f"Audio processing error: {str(e)}")
return None
def predict(audio_path):
try:
# Process audio
processed = preprocess_audio(audio_path)
if processed is None:
return "Error: Couldn't process audio"
# Debug output
print(f"Model input shape: {processed.shape}")
# Predict and return animal name
pred = model.predict(processed)
return label_encoder.inverse_transform([np.argmax(pred)])[0]
except Exception as e:
return f"Prediction error: {str(e)}"
# Create simple interface
gr.Interface(
fn=predict,
inputs=gr.Audio(type="filepath"),
outputs="label",
title="Animal Sound Classifier",
description="Upload a short animal sound (2-5 seconds)"
).launch() |