Spaces:

rtik007
/

DogBreadDetector

Sleeping

App Files Files Community

rtik007 commited on Jan 24, 2025

Commit

13692c5

verified ·

1 Parent(s): 3bd7f39

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -46

app.py CHANGED Viewed

@@ -5,81 +5,147 @@ Neural Network (CNN) within PyTorch framework. Additionally, Gradio is used to b
 interface for easy image uploads and breed predictions.
 '''
-import gradio as gr
-import torchvision.transforms as transforms
-import torch.nn.functional as F
-from PIL import Image
 import numpy as np
 import torch
 import torchvision.models as models
-import torch.nn as nn
-# 1. Load your fine-tuned model
-num_breeds = 120  # Example: 120 dog breeds
-DOG_BREEDS = ["Chihuahua", "Japanese Spaniel", ..., "Mastiff"]  # etc., in correct order
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Start from VGG16 base
-fine_tuned_model = models.vgg16(weights="IMAGENET1K_V1")
-# Replace classifier (should match whatever you used during training)
-fine_tuned_model.classifier[-1] = nn.Linear(in_features=4096, out_features=num_breeds)
-fine_tuned_model.to(device)
-# Load the trained weights
-fine_tuned_model.load_state_dict(torch.load("dog_breed_vgg16.pth", map_location=device))
-fine_tuned_model.eval()
-# 2. Define transforms, including normalization
-in_transform = transforms.Compose([
-    transforms.Resize((224, 224)),
-    transforms.ToTensor(),
-    transforms.Normalize(
-        mean=[0.485, 0.456, 0.406],  # ImageNet means
-        std=[0.229, 0.224, 0.225]    # ImageNet std
-    )
-])
 def load_convert_image_to_tensor(image):
-    """Converts image (numpy/PIL) to a PyTorch tensor, normalized for VGG16."""
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image.astype('uint8'), 'RGB')
     elif isinstance(image, str):
         image = Image.open(image).convert('RGB')
-    return in_transform(image).unsqueeze(0).to(device)
 def classify_image(image, confidence_threshold=0.0):
-    """Classify the image as one of the dog breeds."""
     try:
-        image_tensor = load_convert_image_to_tensor(image)
         with torch.no_grad():
-            output = fine_tuned_model(image_tensor)
-            softmax_output = F.softmax(output, dim=1)
-            top_probs, top_classes = torch.topk(softmax_output, 3)
         top_probs = top_probs.cpu().numpy()[0]
         top_classes = top_classes.cpu().numpy()[0]
-        result = {}
-        for prob, cls_id in zip(top_probs, top_classes):
             if prob >= confidence_threshold:
-                breed_label = DOG_BREEDS[cls_id]
-                result[breed_label] = float(prob)
-        return result if result else "No predictions above the confidence threshold."
     except Exception as e:
-        return f"Error: {str(e)}"
-# Gradio interface
 image_input = gr.Image()
-confidence_slider = gr.Slider(0, 1, value=0.1, label="Confidence Threshold")
 label_output = gr.Label(num_top_classes=3)
 interface = gr.Interface(
-    fn=classify_image,
     inputs=[image_input, confidence_slider],
     outputs=label_output,
-    title="Dog Breed Classifier",
-    description="Upload an image of a dog to see the predicted breed(s)."
 )
 interface.launch(share=True)

 interface for easy image uploads and breed predictions.
 '''
+# -----------------------------
+#  INSTALL DEPENDENCIES (if needed)
+# -----------------------------
+# !pip install torch torchvision
+# !pip install gradio
+# !pip install requests
+# !pip install pillow
 import numpy as np
 import torch
 import torchvision.models as models
+import torchvision.transforms as transforms
+import requests
+from PIL import Image
+import gradio as gr
+# -----------------------------
+#  SETUP
+# -----------------------------
+# Prefer GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the pretrained VGG16 model
+model = models.vgg16(weights="IMAGENET1K_V1").to(device)
+model.eval()  # Important: set to evaluation mode
+# Global variable to hold ImageNet labels once downloaded
+LABELS_CACHE = None
+def prefetch_labels():
+    """
+    Fetch the human-readable labels for ImageNet classes.
+    This uses a known list from GitHub.
+    """
+    global LABELS_CACHE
+    LABELS_MAP_URL = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
+    try:
+        LABELS_CACHE = requests.get(LABELS_MAP_URL, timeout=5).json()
+    except requests.exceptions.RequestException as e:
+        LABELS_CACHE = None
+        print(f"Error fetching labels: {e}")
+# Fetch labels when the script starts
+prefetch_labels()
 def load_convert_image_to_tensor(image):
+    """
+    Takes in a Gradio image (numpy or file path), converts it to
+    a PyTorch tensor, and applies the standard transforms for ImageNet models.
+    """
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image.astype('uint8'), 'RGB')
     elif isinstance(image, str):
         image = Image.open(image).convert('RGB')
+    # Note: We normalize with the same mean/std used for ImageNet-trained models
+    transform_pipeline = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.485, 0.456, 0.406],  # ImageNet means
+            std=[0.229, 0.224, 0.225]    # ImageNet stds
+        )
+    ])
+    tensor = transform_pipeline(image).unsqueeze(0).to(device)
+    return tensor
+def get_human_readable_label_for_class_id(class_id):
+    """
+    Convert a class ID (0-999) into a human-readable label
+    based on ImageNet categories.
+    """
+    if LABELS_CACHE is None or class_id >= len(LABELS_CACHE):
+        return f"Unknown class ID: {class_id}"
+    return LABELS_CACHE[class_id]
 def classify_image(image, confidence_threshold=0.0):
+    """
+    Classify the input image (via Gradio) into ImageNet classes,
+    returning top-3 predictions that exceed the confidence threshold.
+    """
+    if LABELS_CACHE is None:
+        return "Error: ImageNet labels not loaded."
     try:
+        # Convert image to a normalized tensor
+        input_tensor = load_convert_image_to_tensor(image)
+        # Forward pass through the model
         with torch.no_grad():
+            output = model(input_tensor)
+        # Compute softmax probabilities
+        probabilities = torch.nn.functional.softmax(output, dim=1)
+        # Get top-3 predictions
+        top_probs, top_classes = torch.topk(probabilities, 3)
+        # Move to CPU and convert to numpy for easy handling
         top_probs = top_probs.cpu().numpy()[0]
         top_classes = top_classes.cpu().numpy()[0]
+        # Build a result dict
+        results = {}
+        for prob, class_id in zip(top_probs, top_classes):
             if prob >= confidence_threshold:
+                label = get_human_readable_label_for_class_id(int(class_id))
+                results[label] = float(prob)
+        # If nothing meets the threshold, return a message
+        if not results:
+            return "No predictions above the confidence threshold."
+        return results
     except Exception as e:
+        return f"Error during classification: {str(e)}"
+# -----------------------------
+#  BUILD THE GRADIO INTERFACE
+# -----------------------------
 image_input = gr.Image()
+confidence_slider = gr.Slider(
+    minimum=0.0,
+    maximum=1.0,
+    value=0.0,  # default threshold
+    label="Confidence Threshold"
+)
 label_output = gr.Label(num_top_classes=3)
 interface = gr.Interface(
+    fn=classify_image,              # Function to call for classification
     inputs=[image_input, confidence_slider],
     outputs=label_output,
+    title="VGG16 ImageNet Classifier",
+    description="Upload an image to see the top ImageNet predictions from a pretrained VGG16 model."
 )
+# -----------------------------
+#  LAUNCH THE APP
+# -----------------------------
 interface.launch(share=True)