File size: 2,032 Bytes
015979f
 
 
 
 
 
2843631
015979f
 
 
b2f5328
2843631
015979f
2843631
b2f5328
2843631
 
015979f
2843631
 
 
ec416c2
2843631
 
 
 
 
 
b2f5328
2843631
 
015979f
2843631
015979f
 
 
 
2843631
b2f5328
 
2843631
015979f
2843631
 
015979f
2843631
b2f5328
b02773c
015979f
 
2843631
015979f
2843631
015979f
 
 
 
 
2843631
b2f5328
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import tensorflow as tf
import joblib
import numpy as np
import gradio as gr
from scipy.io import wavfile

# Load model and label encoder
model = tf.keras.models.load_model("animal_sound_cnn.keras")
label_encoder = joblib.load("label_encoder.joblib")

def preprocess_audio(audio_path):
    """Simple audio preprocessing for animal sounds"""
    try:
        # 1. Load audio file (convert to mono if stereo)
        sr, y = wavfile.read(audio_path)
        y = np.mean(y, axis=1) if len(y.shape) > 1 else y
        y = y.astype(np.float32) / np.max(np.abs(y))  # Normalize
        
        # 2. Create spectrogram (adjust these parameters to match your training)
        spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
        spectrogram = tf.abs(spectrogram)  # Magnitude
        
        # 3. Reshape to what your model expects (1, 384)
        flattened = tf.reshape(spectrogram, (1, -1))  # Flatten all
        if flattened.shape[1] < 384:
            flattened = tf.pad(flattened, [[0, 0], [0, 384-flattened.shape[1]]])
        else:
            flattened = flattened[:, :384]  # Trim if too long
            
        return flattened.numpy()
        
    except Exception as e:
        print(f"Audio processing error: {str(e)}")
        return None

def predict(audio_path):
    try:
        # Process audio
        processed = preprocess_audio(audio_path)
        if processed is None:
            return "Error: Couldn't process audio"
            
        # Debug output
        print(f"Model input shape: {processed.shape}")
        
        # Predict and return animal name
        pred = model.predict(processed)
        return label_encoder.inverse_transform([np.argmax(pred)])[0]
        
    except Exception as e:
        return f"Prediction error: {str(e)}"

# Create simple interface
gr.Interface(
    fn=predict,
    inputs=gr.Audio(type="filepath"),
    outputs="label",
    title="Animal Sound Classifier",
    description="Upload a short animal sound (2-5 seconds)"
).launch()