AKMESSI commited on
Commit
fa148af
Β·
verified Β·
1 Parent(s): acd11c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -72
app.py CHANGED
@@ -1,15 +1,15 @@
1
- # ===============================
2
- # Mumbai Bird Call Identifier
3
- # FINAL PRODUCTION app.py
4
- # ===============================
5
 
6
  import streamlit as st
7
  import torch
8
  import torchaudio
9
  import numpy as np
 
 
10
  from torchvision import models, transforms
11
  from PIL import Image
12
- import io
13
 
14
  # ================== PAGE CONFIG ==================
15
  st.set_page_config(
@@ -26,6 +26,9 @@ TARGET_SAMPLES = SAMPLE_RATE * DURATION
26
  HIGH_CONF = 0.60
27
  MEDIUM_CONF = 0.35
28
 
 
 
 
29
  # ================== LOAD MODEL ==================
30
  @st.cache_resource
31
  def load_model():
@@ -77,27 +80,21 @@ val_transform = transforms.Compose([
77
  )
78
  ])
79
 
80
- # ================== UI HEADER ==================
81
  st.title("🐦 Mumbai Balcony Bird Call Identifier")
82
  st.markdown(
83
  """
84
- Identify **204 Indian bird species** using their calls.
85
- Trained on **real Mumbai & Maharashtra recordings**.
86
-
87
- πŸ“Œ **Best results:**
88
- β€’ Record early morning / evening
89
- β€’ Minimal background noise
90
- β€’ 5–10 seconds duration
91
  """
92
  )
93
 
94
- # ================== FILE UPLOAD ==================
95
  audio_file = st.file_uploader(
96
  "Upload bird call audio (WAV / MP3 / M4A / OGG)",
97
  type=["wav", "mp3", "m4a", "ogg"]
98
  )
99
 
100
- # ================== MAIN LOGIC ==================
101
  if audio_file:
102
  st.audio(audio_file)
103
 
@@ -131,11 +128,15 @@ if audio_file:
131
  mel = db_transform(mel)
132
  mel = mel.squeeze(0)
133
 
134
- # -------- MEL β†’ IMAGE (PNG STYLE) --------
135
  mel_np = mel.numpy()
136
- mel_img = (mel_np - mel_np.min()) / (mel_np.ptp() + 1e-8)
137
- mel_img = (mel_img * 255).astype(np.uint8)
138
- mel_pil = Image.fromarray(mel_img).convert("RGB")
 
 
 
 
139
 
140
  # -------- MODEL INPUT --------
141
  model_input = val_transform(mel_pil).unsqueeze(0)
@@ -153,80 +154,32 @@ if audio_file:
153
  top1_prob = top5_probs[0].item()
154
  top1_species = class_names[top5_idx[0]]
155
 
156
- # -------- CONFIDENCE STATUS --------
157
  if top1_prob >= HIGH_CONF:
158
  st.success("βœ… High confidence identification")
159
- confidence_label = "High"
160
  elif top1_prob >= MEDIUM_CONF:
161
  st.warning("⚠️ Medium confidence identification")
162
- confidence_label = "Medium"
163
  else:
164
  st.error("❓ Low confidence – possibly unknown species")
165
- confidence_label = "Low"
166
 
167
  st.markdown(f"## 🐦 {top1_species}")
 
168
 
169
- st.metric(
170
- label="Confidence",
171
- value=f"{top1_prob*100:.1f}%",
172
- delta=confidence_label
173
- )
174
-
175
- # -------- TOP 5 --------
176
  st.markdown("### πŸ” Other possible matches")
177
  for i in range(1, 5):
178
  st.markdown(
179
  f"- **{class_names[top5_idx[i]]}** β€” {top5_probs[i].item():.1%}"
180
  )
181
 
182
- # -------- LOW CONFIDENCE EXPLANATION --------
183
- if top1_prob < MEDIUM_CONF:
184
- st.info(
185
- """
186
- This recording may contain:
187
- - A species not in the dataset
188
- - Juvenile or alarm calls
189
- - Multiple birds calling
190
- - Background noise
191
-
192
- Try recording again in a quieter environment.
193
- """
194
- )
195
-
196
- # -------- SPECTROGRAM --------
197
  st.markdown("---")
198
- st.subheader("πŸ“Š Mel Spectrogram Used by the Model")
199
- st.image(
200
- mel_img,
201
- caption="Frequency (vertical) vs Time (horizontal)",
202
- use_container_width=True
203
- )
204
-
205
- # -------- SPECIES INFO CARD --------
206
- with st.expander("πŸ“– About this bird"):
207
- st.markdown(
208
- f"""
209
- **Scientific name:** {top1_species}
210
- **Region:** Mumbai & Maharashtra
211
- **Typical habitat:** Urban areas, forest edges, wetlands
212
- **Best time to hear:** Early morning or dusk
213
- """
214
- )
215
 
216
  else:
217
  st.info("πŸ‘† Upload a bird call audio file to begin")
218
 
219
- col1, col2, col3 = st.columns(3)
220
- col1.metric("Species Covered", "204")
221
- col2.metric("Training Calls", "8000+")
222
- col3.metric("Region", "Mumbai & MH")
223
-
224
  # ================== FOOTER ==================
225
  st.markdown("---")
226
  st.caption(
227
- "⚠️ AI predictions are probabilistic. "
228
- "For critical identifications, verify using field guides or experts."
229
- )
230
- st.caption(
231
- "Model trained on real bird call recordings sourced from citizen science datasets."
232
  )
 
1
+ # ==========================================
2
+ # Mumbai Bird Call Identifier β€” FINAL FIX
3
+ # ==========================================
 
4
 
5
  import streamlit as st
6
  import torch
7
  import torchaudio
8
  import numpy as np
9
+ import io
10
+ import matplotlib.cm as cm
11
  from torchvision import models, transforms
12
  from PIL import Image
 
13
 
14
  # ================== PAGE CONFIG ==================
15
  st.set_page_config(
 
26
  HIGH_CONF = 0.60
27
  MEDIUM_CONF = 0.35
28
 
29
+ # ⚠️ MUST MATCH TRAINING
30
+ SPECTROGRAM_COLORMAP = "magma" # ← change ONLY if training used something else
31
+
32
  # ================== LOAD MODEL ==================
33
  @st.cache_resource
34
  def load_model():
 
80
  )
81
  ])
82
 
83
+ # ================== UI ==================
84
  st.title("🐦 Mumbai Balcony Bird Call Identifier")
85
  st.markdown(
86
  """
87
+ Identify **204 Indian bird species** from their calls.
88
+ Model trained on **PNG spectrogram images** from Mumbai & Maharashtra.
 
 
 
 
 
89
  """
90
  )
91
 
 
92
  audio_file = st.file_uploader(
93
  "Upload bird call audio (WAV / MP3 / M4A / OGG)",
94
  type=["wav", "mp3", "m4a", "ogg"]
95
  )
96
 
97
+ # ================== PROCESS ==================
98
  if audio_file:
99
  st.audio(audio_file)
100
 
 
128
  mel = db_transform(mel)
129
  mel = mel.squeeze(0)
130
 
131
+ # -------- MEL β†’ COLORED PNG (CRITICAL FIX) --------
132
  mel_np = mel.numpy()
133
+ mel_norm = (mel_np - mel_np.min()) / (mel_np.ptp() + 1e-8)
134
+
135
+ cmap = cm.get_cmap(SPECTROGRAM_COLORMAP)
136
+ colored = cmap(mel_norm)[:, :, :3] # drop alpha
137
+ mel_img = (colored * 255).astype(np.uint8)
138
+
139
+ mel_pil = Image.fromarray(mel_img)
140
 
141
  # -------- MODEL INPUT --------
142
  model_input = val_transform(mel_pil).unsqueeze(0)
 
154
  top1_prob = top5_probs[0].item()
155
  top1_species = class_names[top5_idx[0]]
156
 
 
157
  if top1_prob >= HIGH_CONF:
158
  st.success("βœ… High confidence identification")
 
159
  elif top1_prob >= MEDIUM_CONF:
160
  st.warning("⚠️ Medium confidence identification")
 
161
  else:
162
  st.error("❓ Low confidence – possibly unknown species")
 
163
 
164
  st.markdown(f"## 🐦 {top1_species}")
165
+ st.metric("Confidence", f"{top1_prob*100:.1f}%")
166
 
 
 
 
 
 
 
 
167
  st.markdown("### πŸ” Other possible matches")
168
  for i in range(1, 5):
169
  st.markdown(
170
  f"- **{class_names[top5_idx[i]]}** β€” {top5_probs[i].item():.1%}"
171
  )
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  st.markdown("---")
174
+ st.subheader("πŸ“Š Spectrogram used by the model")
175
+ st.image(mel_img, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  else:
178
  st.info("πŸ‘† Upload a bird call audio file to begin")
179
 
 
 
 
 
 
180
  # ================== FOOTER ==================
181
  st.markdown("---")
182
  st.caption(
183
+ "⚠️ This model predicts among known species only. "
184
+ "Low confidence may indicate an unseen species or noisy audio."
 
 
 
185
  )