Spaces:

rtik007
/

DogBreadDetector

Sleeping

App Files Files Community

rtik007 commited on Jan 24, 2025

Commit

3bd7f39

verified ·

1 Parent(s): 2060a55

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -63

app.py CHANGED Viewed

@@ -5,89 +5,81 @@ Neural Network (CNN) within PyTorch framework. Additionally, Gradio is used to b
 interface for easy image uploads and breed predictions.
 '''
-#!pip install torch torchvision
-#!pip install matplotlib
-#!pip install gradio
 import numpy as np
 import torch
 import torchvision.models as models
-from PIL import Image
-import torchvision.transforms as transforms
-import requests
-import gradio as gr
-import os
-# Load pretrained VGG16 model
-VGG16 = models.vgg16(weights="IMAGENET1K_V1")
-use_cuda = torch.cuda.is_available()
-if use_cuda:
-    VGG16 = VGG16.cuda()
-# Global cache for labels
-LABELS_CACHE = None
-def prefetch_labels():
-    global LABELS_CACHE
-    LABELS_MAP_URL = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
-    try:
-        LABELS_CACHE = requests.get(LABELS_MAP_URL, timeout=5).json()
-    except requests.exceptions.RequestException as e:
-        LABELS_CACHE = None
-        print(f"Error fetching labels: {e}")
-# Fetch labels on startup
-prefetch_labels()
 def load_convert_image_to_tensor(image):
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image.astype('uint8'), 'RGB')
     elif isinstance(image, str):
         image = Image.open(image).convert('RGB')
-    in_transform = transforms.Compose([
-        transforms.Resize(size=(224, 224)),
-        transforms.ToTensor()
-    ])
-    image = in_transform(image)[:3, :, :].unsqueeze(0)
-    return image
-def get_human_readable_label_for_class_id(class_id, labels_cache=None):
-    if labels_cache is None or class_id >= len(labels_cache):
-        return f"Unknown class ID: {class_id}"
-    return labels_cache[class_id]
 def classify_image(image, confidence_threshold=0.0):
-    global LABELS_CACHE
-    if LABELS_CACHE is None:
-        return "Error: Labels not loaded"
     try:
         image_tensor = load_convert_image_to_tensor(image)
-        if use_cuda:
-            image_tensor = image_tensor.cuda()
-        output = VGG16(image_tensor)
-        softmax_output = torch.nn.functional.softmax(output, dim=1)
-        top_probs, top_classes = torch.topk(softmax_output, 3)
-        top_probs = top_probs.cpu().detach().numpy() if use_cuda else top_probs.detach().numpy()
-        top_classes = top_classes.cpu().detach().numpy() if use_cuda else top_classes.detach().numpy()
         result = {}
-        for prob, cls_id in zip(top_probs[0], top_classes[0]):
             if prob >= confidence_threshold:
-                label = get_human_readable_label_for_class_id(int(cls_id), LABELS_CACHE)
-                result[label] = prob
         return result if result else "No predictions above the confidence threshold."
     except Exception as e:
         return f"Error: {str(e)}"
-# Gradio Interface
 image_input = gr.Image()
-confidence_slider = gr.Slider(0, 1, 0.0, label="Confidence Threshold (Optional)")  # Changed this line
-label_output = gr.Label(num_top_classes=3)
-interface = gr.Interface(fn=classify_image, inputs=[image_input, confidence_slider], outputs=label_output)
-# Launch Gradio with shareable link
-interface.launch(share=True)

 interface for easy image uploads and breed predictions.
 '''
+import gradio as gr
+import torchvision.transforms as transforms
+import torch.nn.functional as F
+from PIL import Image
 import numpy as np
 import torch
 import torchvision.models as models
+import torch.nn as nn
+# 1. Load your fine-tuned model
+num_breeds = 120  # Example: 120 dog breeds
+DOG_BREEDS = ["Chihuahua", "Japanese Spaniel", ..., "Mastiff"]  # etc., in correct order
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Start from VGG16 base
+fine_tuned_model = models.vgg16(weights="IMAGENET1K_V1")
+# Replace classifier (should match whatever you used during training)
+fine_tuned_model.classifier[-1] = nn.Linear(in_features=4096, out_features=num_breeds)
+fine_tuned_model.to(device)
+# Load the trained weights
+fine_tuned_model.load_state_dict(torch.load("dog_breed_vgg16.pth", map_location=device))
+fine_tuned_model.eval()
+# 2. Define transforms, including normalization
+in_transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(
+        mean=[0.485, 0.456, 0.406],  # ImageNet means
+        std=[0.229, 0.224, 0.225]    # ImageNet std
+    )
+])
 def load_convert_image_to_tensor(image):
+    """Converts image (numpy/PIL) to a PyTorch tensor, normalized for VGG16."""
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image.astype('uint8'), 'RGB')
     elif isinstance(image, str):
         image = Image.open(image).convert('RGB')
+    return in_transform(image).unsqueeze(0).to(device)
 def classify_image(image, confidence_threshold=0.0):
+    """Classify the image as one of the dog breeds."""
     try:
         image_tensor = load_convert_image_to_tensor(image)
+        with torch.no_grad():
+            output = fine_tuned_model(image_tensor)
+            softmax_output = F.softmax(output, dim=1)
+            top_probs, top_classes = torch.topk(softmax_output, 3)
+        top_probs = top_probs.cpu().numpy()[0]
+        top_classes = top_classes.cpu().numpy()[0]
         result = {}
+        for prob, cls_id in zip(top_probs, top_classes):
             if prob >= confidence_threshold:
+                breed_label = DOG_BREEDS[cls_id]
+                result[breed_label] = float(prob)
         return result if result else "No predictions above the confidence threshold."
     except Exception as e:
         return f"Error: {str(e)}"
+# Gradio interface
 image_input = gr.Image()
+confidence_slider = gr.Slider(0, 1, value=0.1, label="Confidence Threshold")
+label_output = gr.Label(num_top_classes=3)
+interface = gr.Interface(
+    fn=classify_image,
+    inputs=[image_input, confidence_slider],
+    outputs=label_output,
+    title="Dog Breed Classifier",
+    description="Upload an image of a dog to see the predicted breed(s)."
+)
+interface.launch(share=True)