amritn8 commited on
Commit
b2f5328
·
verified ·
1 Parent(s): 7378e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -74
app.py CHANGED
@@ -4,76 +4,37 @@ import numpy as np
4
  import gradio as gr
5
  from scipy.io import wavfile
6
  import os
7
- import warnings
8
-
9
- # Suppress sklearn version warning
10
- warnings.filterwarnings("ignore", category=UserWarning)
11
 
12
  # Load model and label encoder
13
  model = tf.keras.models.load_model("animal_sound_cnn.keras")
14
  label_encoder = joblib.load("label_encoder.joblib")
15
 
16
- def preprocess_audio(audio_path, target_shape=(64, 64)):
17
  """
18
- Robust audio preprocessing with extensive error handling
 
19
  """
20
  try:
21
- # 1. Read WAV file with error handling
22
- try:
23
- sr, y = wavfile.read(audio_path)
24
- except Exception as e:
25
- print(f"Error reading WAV file: {str(e)}")
26
- return None
27
-
28
- # 2. Convert to mono and float32
29
- if len(y.shape) > 1:
30
  y = y.mean(axis=1)
31
- y = y.astype(np.float32)
32
-
33
- # 3. Normalize audio
34
- y = y / np.max(np.abs(y))
35
-
36
- # 4. Pad/trim to consistent length (3 seconds at 22050Hz)
37
- target_samples = 3 * 22050
38
- if len(y) > target_samples:
39
- y = y[:target_samples]
40
- else:
41
- y = np.pad(y, (0, max(0, target_samples - len(y))), mode='constant')
42
 
43
- # 5. Create spectrogram
44
- spectrogram = tf.signal.stft(
45
- y,
46
- frame_length=1024,
47
- frame_step=512,
48
- fft_length=1024
49
- )
50
  spectrogram = tf.abs(spectrogram)
51
 
52
- # 6. Convert to mel scale and dB
53
- num_spectrogram_bins = spectrogram.shape[-1]
54
- linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
55
- target_shape[0],
56
- num_spectrogram_bins,
57
- 22050,
58
- 20,
59
- 8000
60
- )
61
- mel_spectrogram = tf.tensordot(
62
- spectrogram,
63
- linear_to_mel_weight_matrix,
64
- 1
65
- )
66
- log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-6)
67
 
68
- # 7. Resize and normalize
69
- log_mel_spectrogram = tf.image.resize(
70
- tf.expand_dims(log_mel_spectrogram, -1),
71
- target_shape
72
- )
73
- log_mel_spectrogram = (log_mel_spectrogram - tf.reduce_min(log_mel_spectrogram)) / \
74
- (tf.reduce_max(log_mel_spectrogram) - tf.reduce_min(log_mel_spectrogram))
75
-
76
- return tf.expand_dims(log_mel_spectrogram, 0).numpy()
77
 
78
  except Exception as e:
79
  print(f"Preprocessing error: {str(e)}")
@@ -82,36 +43,34 @@ def preprocess_audio(audio_path, target_shape=(64, 64)):
82
  def predict(audio_path):
83
  try:
84
  # 1. Preprocess audio
85
- spectrogram = preprocess_audio(audio_path)
86
- if spectrogram is None:
87
- return "Error: Could not process audio file"
88
 
89
- # 2. Debug output
90
- print(f"Input shape: {spectrogram.shape}")
91
- print(f"Input range: {np.min(spectrogram)} to {np.max(spectrogram)}")
92
 
93
- # 3. Predict
94
- pred = model.predict(spectrogram)
95
  animal = label_encoder.inverse_transform([np.argmax(pred)])[0]
96
-
97
  return animal
98
 
99
  except Exception as e:
100
  return f"Prediction error: {str(e)}"
101
 
102
- # requirements.txt should include:
103
  # tensorflow>=2.16.0
104
- # scikit-learn>=1.7.1
105
- # joblib>=1.4.0
106
- # numpy>=1.24.0
107
- # gradio>=4.0.0
108
- # scipy>=1.10.0
109
 
110
  gr.Interface(
111
  fn=predict,
112
  inputs=gr.Audio(type="filepath"),
113
  outputs="label",
114
  title="Animal Sound Classifier",
115
- description="Upload a short audio clip (2-5 seconds) of an animal sound",
116
  examples=["example.wav"] if os.path.exists("example.wav") else None
117
- ).launch(server_name="0.0.0.0", server_port=7860)
 
4
  import gradio as gr
5
  from scipy.io import wavfile
6
  import os
 
 
 
 
7
 
8
  # Load model and label encoder
9
  model = tf.keras.models.load_model("animal_sound_cnn.keras")
10
  label_encoder = joblib.load("label_encoder.joblib")
11
 
12
+ def preprocess_audio(audio_path):
13
  """
14
+ Processes audio to match model's expected input shape
15
+ Returns: (1, 384) shaped array ready for model prediction
16
  """
17
  try:
18
+ # 1. Read and normalize audio
19
+ sr, y = wavfile.read(audio_path)
20
+ if len(y.shape) > 1: # Convert stereo to mono
 
 
 
 
 
 
21
  y = y.mean(axis=1)
22
+ y = y.astype(np.float32) / np.max(np.abs(y))
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # 2. Create spectrogram (adjust parameters to match your model's training)
25
+ spectrogram = tf.signal.stft(y, frame_length=256, frame_step=128, fft_length=256)
 
 
 
 
 
26
  spectrogram = tf.abs(spectrogram)
27
 
28
+ # 3. Flatten to match model's expected input shape (1, 384)
29
+ flattened = tf.reshape(spectrogram, (1, -1)) # Flatten all dimensions
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # 4. Pad or trim to exactly 384 features
32
+ if flattened.shape[1] < 384:
33
+ flattened = tf.pad(flattened, [[0, 0], [0, 384 - flattened.shape[1]]])
34
+ else:
35
+ flattened = flattened[:, :384]
36
+
37
+ return flattened.numpy().astype(np.float32)
 
 
38
 
39
  except Exception as e:
40
  print(f"Preprocessing error: {str(e)}")
 
43
  def predict(audio_path):
44
  try:
45
  # 1. Preprocess audio
46
+ processed = preprocess_audio(audio_path)
47
+ if processed is None:
48
+ return "Error processing audio"
49
 
50
+ # Debug output
51
+ print(f"Model input shape: {processed.shape}")
 
52
 
53
+ # 2. Predict
54
+ pred = model.predict(processed)
55
  animal = label_encoder.inverse_transform([np.argmax(pred)])[0]
 
56
  return animal
57
 
58
  except Exception as e:
59
  return f"Prediction error: {str(e)}"
60
 
61
+ # Minimal requirements.txt
62
  # tensorflow>=2.16.0
63
+ # scikit-learn
64
+ # joblib
65
+ # numpy
66
+ # gradio
67
+ # scipy
68
 
69
  gr.Interface(
70
  fn=predict,
71
  inputs=gr.Audio(type="filepath"),
72
  outputs="label",
73
  title="Animal Sound Classifier",
74
+ description="Upload a short animal sound clip (2-5 seconds)",
75
  examples=["example.wav"] if os.path.exists("example.wav") else None
76
+ ).launch()