Spaces:

fevot
/

iti110

Build error

App Files Files Community

fevot commited on Feb 26, 2025

Commit

60eeb55

verified ·

1 Parent(s): 1526231

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -18

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import json
 from torchvision import models
 import librosa
-# Define the BirdCallRNN model (unchanged)
 class BirdCallRNN(nn.Module):
     def __init__(self, resnet, num_features, num_classes):
         super(BirdCallRNN, self).__init__()
@@ -21,7 +21,7 @@ class BirdCallRNN(nn.Module):
         features = self.resnet(x)
         features = features.view(batch, seq_len, -1)
         rnn_out, _ = self.rnn(features)
-        output = self.fc(rnn_out[:, -1, :])
         return output
 # Function to convert MP3 to mel spectrogram (unchanged)
@@ -45,7 +45,7 @@ def mp3_to_mel_spectrogram(mp3_file, target_shape=(128, 500), resize_shape=(224,
 with open('class_mapping.json', 'r') as f:
     class_names = json.load(f)
-# Revised inference function to include confidence scores
 def infer_birdcall(model, mp3_file, segment_length=500, device="cuda"):
     model.eval()
     # Load audio and compute mel spectrogram
@@ -67,13 +67,9 @@ def infer_birdcall(model, mp3_file, segment_length=500, device="cuda"):
         # Create a tensor with batch size 1 and sequence length 1
         seg_tensor = torch.from_numpy(seg_rgb).permute(2, 0, 1).float().unsqueeze(0).unsqueeze(0).to(device)  # Shape: (1, 1, 3, 224, 224)
         output = model(seg_tensor)
-        # Apply softmax to get probabilities
-        probs = torch.softmax(output, dim=1)
-        confidence, pred_idx = torch.max(probs, dim=1)
-        pred_idx = pred_idx.cpu().numpy()[0]
-        confidence = confidence.cpu().numpy()[0]
-        predicted_bird = class_names[str(pred_idx)]
-        predictions.append((predicted_bird, confidence))
     return predictions
 # Initialize the model
@@ -87,20 +83,20 @@ model.to(device)
 model.load_state_dict(torch.load('model_weights.pth', map_location=device))
 model.eval()
-# Prediction function with confidence scores
 def predict_bird(file_path):
     predictions = infer_birdcall(model, file_path, segment_length=500, device=str(device))
-    # Format predictions as a numbered list with confidence scores
-    formatted_predictions = "\n".join([f"{i+1}. {pred} (Confidence: {conf*100:.2f}%)" for i, (pred, conf) in enumerate(predictions)])
-    return formatted_predictions
-# Custom Gradio interface
 def gradio_interface(file_path):
-    # Predict bird species with confidence
     prediction = predict_bird(file_path)
     # Display the uploaded MP3 file with a play button
-    audio_player = gr.Audio(file_path, label="Uploaded MP3 File", visible=True, autoplay=False)
     # Display images with titles
     bird_species_image = gr.Image("1.jpg", label="Bird Species")
@@ -121,4 +117,4 @@ interface = gr.Interface(
         gr.Image(label="Bird Origins")
     ]
 )
-interface.launch(share=True)

 from torchvision import models
 import librosa
+# Define the BirdCallRNN model
 class BirdCallRNN(nn.Module):
     def __init__(self, resnet, num_features, num_classes):
         super(BirdCallRNN, self).__init__()
         features = self.resnet(x)
         features = features.view(batch, seq_len, -1)
         rnn_out, _ = self.rnn(features)
+        output = self.fc(rnn_out[:, -1, :])  # Note: We’ll use this for single-segment sequences
         return output
 # Function to convert MP3 to mel spectrogram (unchanged)
 with open('class_mapping.json', 'r') as f:
     class_names = json.load(f)
+# Revised inference function to predict per segment
 def infer_birdcall(model, mp3_file, segment_length=500, device="cuda"):
     model.eval()
     # Load audio and compute mel spectrogram
         # Create a tensor with batch size 1 and sequence length 1
         seg_tensor = torch.from_numpy(seg_rgb).permute(2, 0, 1).float().unsqueeze(0).unsqueeze(0).to(device)  # Shape: (1, 1, 3, 224, 224)
         output = model(seg_tensor)
+        pred = torch.max(output, dim=1)[1].cpu().numpy()[0]
+        predicted_bird = class_names[str(pred)]  # Convert pred to string to match JSON keys
+        predictions.append(predicted_bird)
     return predictions
 # Initialize the model
 model.load_state_dict(torch.load('model_weights.pth', map_location=device))
 model.eval()
+# Prediction function for Gradio
 def predict_bird(file_path):
     predictions = infer_birdcall(model, file_path, segment_length=500, device=str(device))
+    # Format predictions as a numbered list
+    formatted_predictions = "\n".join([f"{i+1}. {pred}" for i, pred in enumerate(predictions)])
+    return formatted_predictions  # Return formatted list of predictions
+# Custom Gradio interface with additional components
 def gradio_interface(file_path):
+    # Predict bird species
     prediction = predict_bird(file_path)
     # Display the uploaded MP3 file with a play button
+    audio_player = gr.Audio(file_path, label="Uploaded MP3 File", visible=True, autoplay=True)
     # Display images with titles
     bird_species_image = gr.Image("1.jpg", label="Bird Species")
         gr.Image(label="Bird Origins")
     ]
 )
+interface.launch()