Spaces:

Calotriton
/

RibbID

Runtime error

App Files Files Community

Calotriton commited on Jul 1, 2025

Commit

38503c5

verified ·

1 Parent(s): 7143a96

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -49

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ class EfficientNetSE(nn.Module):
         return self.classifier(x)
 # ----------------------------
-# 3) Audio preprocessing
 # ----------------------------
 def load_and_normalize(path, sr=SR, target_dBFS=-20.0):
     y, _   = librosa.load(path, sr=sr)
@@ -63,12 +63,11 @@ def load_and_normalize(path, sr=SR, target_dBFS=-20.0):
     return y * scalar
 def bandpass(y, sr=SR, low=FMIN, high=FMAX, order=6):
-    nyq = 0.5*sr
-    b,a = sps.butter(order, [low/nyq, high/nyq], btype='band')
-    return sps.filtfilt(b,a,y)
 def segment(y, sr=SR, win=DURATION, hop=1.0):
-    w = int(win*sr); h = int(hop*sr)
     if len(y) < w:
         y = np.pad(y, (0, w - len(y)))
         return [y]
@@ -83,8 +82,8 @@ def extract_log_mel(y, sr=SR, n_mels=128, hop_length=HOP, fmin=FMIN, fmax=FMAX):
 def predict_segments(fp):
     y    = load_and_normalize(fp)
-    y    = bandpass(y)
-    segs = segment(y)
     all_p = []
     with torch.no_grad():
         for seg in segs:
@@ -100,18 +99,14 @@ def predict_segments(fp):
 with open(DATA_PKL, "rb") as f:
     data = pickle.load(f)
 classes        = data["classes"]
-orig_thresholds = np.array(data["thresholds"])
 adj_thresholds = np.array(data["adj_thresholds"])
-# Rebuild encoder
 le = LabelEncoder()
 le.classes_ = np.array(classes, dtype=object)
-# Calibrators
 with open(CAL_PATH, "rb") as f:
     calibrators = pickle.load(f)
-# Load backbone & model
 backbone = torch.hub.load('pytorch/vision:v0.14.0','efficientnet_b0',pretrained=True)
 backbone.features[0][0] = nn.Conv2d(1,32,3,2,1,bias=False)
 model = EfficientNetSE(backbone, num_classes=len(le.classes_)).to(DEVICE)
@@ -122,62 +117,48 @@ model.eval()
 # 5) Inference logic
 # ----------------------------
 def infer(audio_path, sensitivity):
-    # segments → probabilities
     seg_probs = predict_segments(audio_path)
     agg       = np.percentile(seg_probs, 90, axis=0)
-    # calibrate
     calibrated = np.array([
         calibrators[i].transform([agg[i]])[0]
         for i in range(len(le.classes_))
     ])
-    # adjust thresholds
     thresholds = adj_thresholds * sensitivity
     preds = calibrated > thresholds
-    # build results
-    results = [(le.classes_[i].replace("_"," "), round(float(calibrated[i]),3))
-               for i, flag in enumerate(preds) if flag]
-    if not results:
         return "🔍 **No species confidently detected.**\nTry reducing the strictness."
-    # sort and format Markdown with italics species names
-    results.sort(key=lambda x: -x[1])
-    md = "### ✅ Detected species:\n"
-    for sp, p in results:
-        md += f"- *{sp}* — probability: {p}\n"
-    return md
 # ----------------------------
-# 6) Gradio Blocks interface
 # ----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("# 🐸 RibbID – Amphibian species identifier\n")
-    # Intro sentence about native species
     gr.Markdown(
-        "This CNN model detects the native frog and toad species of **Catalonia** (Nort-East Spain) through ther calls."
     )
-    gr.Markdown(
-        "To start, **upload** an audio file or record a new one. Next, **select** the detection strictness in the slider, and click **submit**. Results might take time.\n"
-        "\n"
-        "**Detection strictness** controls how conservative the model is:\n"
-        "- **Lower values (0.5)** = more sensitive (may include false positives).\n"
-        "- **Higher values (1.0)** = only very confident detections (may ignore true positives)."
-    )
     with gr.Row():
-        audio = gr.Audio(type="filepath", label="Upload audio file (.wav/.mp3) or record live")
         slider = gr.Slider(0.5, 1.0, value=1.0, step=0.05,
                            label="Detection strictness")
-    output = gr.Markdown()
-    btn = gr.Button("Submit")
-    btn.click(
-        fn=infer,
-        inputs=[audio, slider],
-        outputs=[output],
-        show_progress=True
-    )
 if __name__ == "__main__":
-    demo.launch(share=False)

         return self.classifier(x)
 # ----------------------------
+# 3) Audio preprocessing functions
 # ----------------------------
 def load_and_normalize(path, sr=SR, target_dBFS=-20.0):
     y, _   = librosa.load(path, sr=sr)
     return y * scalar
 def bandpass(y, sr=SR, low=FMIN, high=FMAX, order=6):
+    sos = sps.butter(order, [low, high], btype='band', fs=sr, output='sos')
+    return sps.sosfilt(sos, y)
 def segment(y, sr=SR, win=DURATION, hop=1.0):
+    w, h = int(win*sr), int(hop*sr)
     if len(y) < w:
         y = np.pad(y, (0, w - len(y)))
         return [y]
 def predict_segments(fp):
     y    = load_and_normalize(fp)
+    y    = bandpass(y, sr=SR)
+    segs = segment(y, sr=SR)
     all_p = []
     with torch.no_grad():
         for seg in segs:
 with open(DATA_PKL, "rb") as f:
     data = pickle.load(f)
 classes        = data["classes"]
 adj_thresholds = np.array(data["adj_thresholds"])
 le = LabelEncoder()
 le.classes_ = np.array(classes, dtype=object)
 with open(CAL_PATH, "rb") as f:
     calibrators = pickle.load(f)
 backbone = torch.hub.load('pytorch/vision:v0.14.0','efficientnet_b0',pretrained=True)
 backbone.features[0][0] = nn.Conv2d(1,32,3,2,1,bias=False)
 model = EfficientNetSE(backbone, num_classes=len(le.classes_)).to(DEVICE)
 # 5) Inference logic
 # ----------------------------
 def infer(audio_path, sensitivity):
     seg_probs = predict_segments(audio_path)
     agg       = np.percentile(seg_probs, 90, axis=0)
     calibrated = np.array([
         calibrators[i].transform([agg[i]])[0]
         for i in range(len(le.classes_))
     ])
     thresholds = adj_thresholds * sensitivity
     preds = calibrated > thresholds
+    detected = [(le.classes_[i].replace("_"," "), round(float(calibrated[i]),3))
+                for i, flag in enumerate(preds) if flag]
+    if not detected:
         return "🔍 **No species confidently detected.**\nTry reducing the strictness."
+    detected.sort(key=lambda x: -x[1])
+    md = "<h3 style='color:#2b7a78;'>✅ Detected Species</h3><ul>"
+    for sp, p in detected:
+        md += f"<li><em>{sp}</em> — probability: <strong>{p}</strong></li>"
+    md += "</ul>"
+    return gr.HTML(md)
 # ----------------------------
+# 6) Gradio Blocks UI
 # ----------------------------
+custom_css = '''
+body { background-color: #f0f8ff; }
+h1, h3 { font-family: 'Helvetica Neue', sans-serif; }
+.gr-button { background-color: #2b7a78 !important; color: white !important; }
+'''
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML("<h1 style='text-align:center; color:#17252a;'>🐸 RibbID</h1>")
+    gr.HTML("<p style='text-align:center;'>Detects native frog and toad species of Catalonia from audio calls.</p>")
     gr.Markdown(
+        "**Strictness** controls detection sensitivity. Lower=more sensitive, higher=more conservative."
     )
     with gr.Row():
+        audio = gr.Audio(type="filepath", label="Upload or record audio (.wav/.mp3)")
         slider = gr.Slider(0.5, 1.0, value=1.0, step=0.05,
                            label="Detection strictness")
+    output = gr.HTML()
+    submit = gr.Button("🔍 Identify Species")
+    submit.click(fn=infer, inputs=[audio, slider], outputs=[output], show_progress=True)
 if __name__ == "__main__":
+    demo.launch(share=False)