import tensorflow as tf import joblib import numpy as np import gradio as gr from scipy.io import wavfile # Load model and label encoder model = tf.keras.models.load_model("animal_sound_cnn.keras") label_encoder = joblib.load("label_encoder.joblib") def preprocess_audio(audio_path): """Simple audio preprocessing for animal sounds""" try: # 1. Load audio file (convert to mono if stereo) sr, y = wavfile.read(audio_path) y = np.mean(y, axis=1) if len(y.shape) > 1 else y y = y.astype(np.float32) / np.max(np.abs(y)) # Normalize # 2. Create spectrogram (adjust these parameters to match your training) spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256) spectrogram = tf.abs(spectrogram) # Magnitude # 3. Reshape to what your model expects (1, 384) flattened = tf.reshape(spectrogram, (1, -1)) # Flatten all if flattened.shape[1] < 384: flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]]) else: flattened = flattened[:, :384] # Trim if too long return flattened.numpy() except Exception as e: print(f"Audio processing error: {str(e)}") return None def predict(audio_path): try: # Process audio processed = preprocess_audio(audio_path) if processed is None: return "Error: Couldn't process audio" # Debug output print(f"Model input shape: {processed.shape}") # Predict and return animal name pred = model.predict(processed) return label_encoder.inverse_transform([np.argmax(pred)])[0] except Exception as e: return f"Prediction error: {str(e)}" # Create simple interface gr.Interface( fn=predict, inputs=gr.Audio(type="filepath"), outputs="label", title="Animal Sound Classifier", description="Upload a short animal sound (2-5 seconds)" ).launch()