Spaces:

amritn8
/

AnimalSoundClassifier

Sleeping

App Files Files Community

amritn8 commited on Jul 29, 2025

Commit

2843631

verified ·

1 Parent(s): a7819ec

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -49

app.py CHANGED Viewed

@@ -3,82 +3,58 @@ import joblib
 import numpy as np
 import gradio as gr
 from scipy.io import wavfile
-import os
-# Load assets
 model = tf.keras.models.load_model("animal_sound_cnn.keras")
 label_encoder = joblib.load("label_encoder.joblib")
-def get_model_input_shape():
-    """Dynamically get the model's expected input shape"""
-    if len(model.input_shape) == 2:
-        return model.input_shape[1]  # For (None, 384) shape
-    elif len(model.input_shape) == 4:
-        return model.input_shape[1:]  # For (None, 64, 64, 1) shape
-    return None
 def preprocess_audio(audio_path):
-    """Universal audio preprocessing that adapts to your model"""
     try:
-        # 1. Load and normalize audio
         sr, y = wavfile.read(audio_path)
-        y = np.mean(y, axis=1) if len(y.shape) > 1 else y  # Stereo to mono
-        y = y.astype(np.float32) / np.max(np.abs(y))
-        # 2. Create spectrogram
-        n_fft = 512
-        hop_length = 256
-        stft = tf.signal.stft(y, frame_length=n_fft, frame_step=hop_length, fft_length=n_fft)
-        spectrogram = tf.abs(stft)
-        # 3. Reshape based on model requirements
-        expected_shape = get_model_input_shape()
-        if expected_shape and len(expected_shape) == 1:  # Flattened input (384)
-            flattened = tf.reshape(spectrogram, (1, -1))
-            if flattened.shape[1] < expected_shape[0]:
-                flattened = tf.pad(flattened, [[0, 0], [0, expected_shape[0] - flattened.shape[1]]])
-            else:
-                flattened = flattened[:, :expected_shape[0]]
-            return flattened.numpy().astype(np.float32)
-        else:  # Image-like input (64, 64, 1)
-            # Convert to mel spectrogram
-            linear_to_mel = tf.signal.linear_to_mel_weight_matrix(
-                num_mel_bins=64,
-                num_spectrogram_bins=spectrogram.shape[-1],
-                sample_rate=22050,
-                lower_edge_hertz=125,
-                upper_edge_hertz=7500)
-            mel_spectrogram = tf.tensordot(spectrogram, linear_to_mel, 1)
-            log_mel = tf.math.log(mel_spectrogram + 1e-6)
-            # Resize and add channel dimension
-            resized = tf.image.resize(tf.expand_dims(log_mel, -1), (64, 64))
-            return tf.expand_dims(resized, 0).numpy().astype(np.float32)
     except Exception as e:
-        print(f"Preprocessing error: {str(e)}")
         return None
 def predict(audio_path):
     try:
         processed = preprocess_audio(audio_path)
         if processed is None:
-            return "Error: Invalid audio input"
-        print(f"Final input shape: {processed.shape}")
         pred = model.predict(processed)
         return label_encoder.inverse_transform([np.argmax(pred)])[0]
     except Exception as e:
-        return f"Prediction failed: {str(e)}"
 gr.Interface(
     fn=predict,
     inputs=gr.Audio(type="filepath"),
     outputs="label",
     title="Animal Sound Classifier",
-    examples=["example.wav"] if os.path.exists("example.wav") else None
 ).launch()

 import numpy as np
 import gradio as gr
 from scipy.io import wavfile
+# Load model and label encoder
 model = tf.keras.models.load_model("animal_sound_cnn.keras")
 label_encoder = joblib.load("label_encoder.joblib")
 def preprocess_audio(audio_path):
+    """Simple audio preprocessing for animal sounds"""
     try:
+        # 1. Load audio file (convert to mono if stereo)
         sr, y = wavfile.read(audio_path)
+        y = np.mean(y, axis=1) if len(y.shape) > 1 else y
+        y = y.astype(np.float32) / np.max(np.abs(y))  # Normalize
+        # 2. Create spectrogram (adjust these parameters to match your training)
+        spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
+        spectrogram = tf.abs(spectrogram)  # Magnitude
+        # 3. Reshape to what your model expects (1, 384)
+        flattened = tf.reshape(spectrogram, (1, -1))  # Flatten all
+        if flattened.shape[1] < 384:
+            flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]])
+        else:
+            flattened = flattened[:, :384]  # Trim if too long
+        return flattened.numpy()
     except Exception as e:
+        print(f"Audio processing error: {str(e)}")
         return None
 def predict(audio_path):
     try:
+        # Process audio
         processed = preprocess_audio(audio_path)
         if processed is None:
+            return "Error: Couldn't process audio"
+        # Debug output
+        print(f"Model input shape: {processed.shape}")
+        # Predict and return animal name
         pred = model.predict(processed)
         return label_encoder.inverse_transform([np.argmax(pred)])[0]
     except Exception as e:
+        return f"Prediction error: {str(e)}"
+# Create simple interface
 gr.Interface(
     fn=predict,
     inputs=gr.Audio(type="filepath"),
     outputs="label",
     title="Animal Sound Classifier",
+    description="Upload a short animal sound (2-5 seconds)"
 ).launch()