Spaces:

amritn8
/

AnimalSoundClassifier

Sleeping

App Files Files Community

amritn8 commited on Jul 29, 2025

Commit

b2f5328

verified ·

1 Parent(s): 7378e3e

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -74

app.py CHANGED Viewed

@@ -4,76 +4,37 @@ import numpy as np
 import gradio as gr
 from scipy.io import wavfile
 import os
-import warnings
-# Suppress sklearn version warning
-warnings.filterwarnings("ignore", category=UserWarning)
 # Load model and label encoder
 model = tf.keras.models.load_model("animal_sound_cnn.keras")
 label_encoder = joblib.load("label_encoder.joblib")
-def preprocess_audio(audio_path, target_shape=(64, 64)):
     """
-    Robust audio preprocessing with extensive error handling
     """
     try:
-        # 1. Read WAV file with error handling
-        try:
-            sr, y = wavfile.read(audio_path)
-        except Exception as e:
-            print(f"Error reading WAV file: {str(e)}")
-            return None
-        # 2. Convert to mono and float32
-        if len(y.shape) > 1:
             y = y.mean(axis=1)
-        y = y.astype(np.float32)
-        # 3. Normalize audio
-        y = y / np.max(np.abs(y))
-        # 4. Pad/trim to consistent length (3 seconds at 22050Hz)
-        target_samples = 3 * 22050
-        if len(y) > target_samples:
-            y = y[:target_samples]
-        else:
-            y = np.pad(y, (0, max(0, target_samples - len(y))), mode='constant')
-        # 5. Create spectrogram
-        spectrogram = tf.signal.stft(
-            y,
-            frame_length=1024,
-            frame_step=512,
-            fft_length=1024
-        )
         spectrogram = tf.abs(spectrogram)
-        # 6. Convert to mel scale and dB
-        num_spectrogram_bins = spectrogram.shape[-1]
-        linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
-            target_shape[0],
-            num_spectrogram_bins,
-            22050,
-            20,
-            8000
-        )
-        mel_spectrogram = tf.tensordot(
-            spectrogram,
-            linear_to_mel_weight_matrix,
-            1
-        )
-        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-6)
-        # 7. Resize and normalize
-        log_mel_spectrogram = tf.image.resize(
-            tf.expand_dims(log_mel_spectrogram, -1),
-            target_shape
-        )
-        log_mel_spectrogram = (log_mel_spectrogram - tf.reduce_min(log_mel_spectrogram)) / \
-                             (tf.reduce_max(log_mel_spectrogram) - tf.reduce_min(log_mel_spectrogram))
-        return tf.expand_dims(log_mel_spectrogram, 0).numpy()
     except Exception as e:
         print(f"Preprocessing error: {str(e)}")
@@ -82,36 +43,34 @@ def preprocess_audio(audio_path, target_shape=(64, 64)):
 def predict(audio_path):
     try:
         # 1. Preprocess audio
-        spectrogram = preprocess_audio(audio_path)
-        if spectrogram is None:
-            return "Error: Could not process audio file"
-        # 2. Debug output
-        print(f"Input shape: {spectrogram.shape}")
-        print(f"Input range: {np.min(spectrogram)} to {np.max(spectrogram)}")
-        # 3. Predict
-        pred = model.predict(spectrogram)
         animal = label_encoder.inverse_transform([np.argmax(pred)])[0]
         return animal
     except Exception as e:
         return f"Prediction error: {str(e)}"
-# requirements.txt should include:
 # tensorflow>=2.16.0
-# scikit-learn>=1.7.1
-# joblib>=1.4.0
-# numpy>=1.24.0
-# gradio>=4.0.0
-# scipy>=1.10.0
 gr.Interface(
     fn=predict,
     inputs=gr.Audio(type="filepath"),
     outputs="label",
     title="Animal Sound Classifier",
-    description="Upload a short audio clip (2-5 seconds) of an animal sound",
     examples=["example.wav"] if os.path.exists("example.wav") else None
-).launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 from scipy.io import wavfile
 import os
 # Load model and label encoder
 model = tf.keras.models.load_model("animal_sound_cnn.keras")
 label_encoder = joblib.load("label_encoder.joblib")
+def preprocess_audio(audio_path):
     """
+    Processes audio to match model's expected input shape
+    Returns: (1, 384) shaped array ready for model prediction
     """
     try:
+        # 1. Read and normalize audio
+        sr, y = wavfile.read(audio_path)
+        if len(y.shape) > 1:  # Convert stereo to mono
             y = y.mean(axis=1)
+        y = y.astype(np.float32) / np.max(np.abs(y))
+        # 2. Create spectrogram (adjust parameters to match your model's training)
+        spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
         spectrogram = tf.abs(spectrogram)
+        # 3. Flatten to match model's expected input shape (1, 384)
+        flattened = tf.reshape(spectrogram, (1, -1))  # Flatten all dimensions
+        # 4. Pad or trim to exactly 384 features
+        if flattened.shape[1] < 384:
+            flattened = tf.pad(flattened, [[0, 0], [0, 384 - flattened.shape[1]]])
+        else:
+            flattened = flattened[:, :384]
+        return flattened.numpy().astype(np.float32)
     except Exception as e:
         print(f"Preprocessing error: {str(e)}")
 def predict(audio_path):
     try:
         # 1. Preprocess audio
+        processed = preprocess_audio(audio_path)
+        if processed is None:
+            return "Error processing audio"
+        # Debug output
+        print(f"Model input shape: {processed.shape}")
+        # 2. Predict
+        pred = model.predict(processed)
         animal = label_encoder.inverse_transform([np.argmax(pred)])[0]
         return animal
     except Exception as e:
         return f"Prediction error: {str(e)}"
+# Minimal requirements.txt
 # tensorflow>=2.16.0
+# scikit-learn
+# joblib
+# numpy
+# gradio
+# scipy
 gr.Interface(
     fn=predict,
     inputs=gr.Audio(type="filepath"),
     outputs="label",
     title="Animal Sound Classifier",
+    description="Upload a short animal sound clip (2-5 seconds)",
     examples=["example.wav"] if os.path.exists("example.wav") else None
+).launch()