Spaces:
Sleeping
Sleeping
| import tensorflow as tf | |
| import joblib | |
| import numpy as np | |
| import gradio as gr | |
| from scipy.io import wavfile | |
| # Load model and label encoder | |
| model = tf.keras.models.load_model("animal_sound_cnn.keras") | |
| label_encoder = joblib.load("label_encoder.joblib") | |
| def preprocess_audio(audio_path): | |
| """Simple audio preprocessing for animal sounds""" | |
| try: | |
| # 1. Load audio file (convert to mono if stereo) | |
| sr, y = wavfile.read(audio_path) | |
| y = np.mean(y, axis=1) if len(y.shape) > 1 else y | |
| y = y.astype(np.float32) / np.max(np.abs(y)) # Normalize | |
| # 2. Create spectrogram (adjust these parameters to match your training) | |
| spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256) | |
| spectrogram = tf.abs(spectrogram) # Magnitude | |
| # 3. Reshape to what your model expects (1, 384) | |
| flattened = tf.reshape(spectrogram, (1, -1)) # Flatten all | |
| if flattened.shape[1] < 384: | |
| flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]]) | |
| else: | |
| flattened = flattened[:, :384] # Trim if too long | |
| return flattened.numpy() | |
| except Exception as e: | |
| print(f"Audio processing error: {str(e)}") | |
| return None | |
| def predict(audio_path): | |
| try: | |
| # Process audio | |
| processed = preprocess_audio(audio_path) | |
| if processed is None: | |
| return "Error: Couldn't process audio" | |
| # Debug output | |
| print(f"Model input shape: {processed.shape}") | |
| # Predict and return animal name | |
| pred = model.predict(processed) | |
| return label_encoder.inverse_transform([np.argmax(pred)])[0] | |
| except Exception as e: | |
| return f"Prediction error: {str(e)}" | |
| # Create simple interface | |
| gr.Interface( | |
| fn=predict, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs="label", | |
| title="Animal Sound Classifier", | |
| description="Upload a short animal sound (2-5 seconds)" | |
| ).launch() |