Spaces:

Shilpaj
/

ImageNet

Sleeping

App Files Files Community

Shilpaj commited on Jan 2, 2025

Commit

077fb0c

verified ·

1 Parent(s): 9280d33

Feat: Files for application

Browse files

Files changed (15) hide show

.gitattributes +1 -0
README.md +16 -13
app.py +115 -0
assets/examples/bird.jpg +0 -0
assets/examples/car.jpg +0 -0
assets/examples/cat.jpg +0 -0
assets/examples/dog.jpg +0 -0
assets/examples/frog.jpg +0 -0
assets/examples/horse.jpg +0 -0
assets/examples/plane.jpg +3 -0
assets/examples/shark-plane.jpg +0 -0
assets/examples/ship.png +0 -0
assets/examples/truck.jpg +0 -0
inference.py +102 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/examples/plane.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,16 @@
----
-title: ImageNet
-emoji: 📉
-colorFrom: pink
-colorTo: green
-sdk: gradio
-sdk_version: 5.9.1
-app_file: app.py
-pinned: false
-short_description: RestNet50 trained on ImageNet
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# ResNet50 trained on ImageNet-1K
+Model trained on ImageNet-1K with 1000 classes.
+## Model
+`resnet50_imagenet1k.pth`
+## Usage
+1. Download the model from the link above.
+2. Use the model in your project.
+```python
+```

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+#!/usr/bin/env python
+"""
+Application for ResNet50 trained on ImageNet-1K.
+"""
+# Standard Library Imports
+import gradio as gr
+# Third Party Imports
+import torch
+from torchvision import models
+# Local Imports
+from inference import inference
+def load_model(model_path: str):
+    """
+    Load the model.
+    """
+    # Load the pre-trained ResNet50 model from ImageNet
+    model = models.resnet50(pretrained=False)
+    # Load custom weights from a .pth file
+    state_dict = torch.load(model_path)
+    # Filter out unexpected keys
+    filtered_state_dict = {k: v for k, v in state_dict['model_state_dict'].items() if k in model.state_dict()}
+    # Load the filtered state dictionary into the model
+    model.load_state_dict(filtered_state_dict, strict=False)
+    return model
+def load_classes():
+    """
+    Load the classes.
+    """
+    # Get ImageNet class names from ResNet50 weights
+    classes = models.ResNet50_Weights.IMAGENET1K_V2.meta["categories"]
+    return classes
+def main():
+    """
+    Main function for the application.
+    """
+    # Load the model at startup
+    model = load_model("resnet50_imagenet1k.pth")
+    # Load the classes at startup
+    classes = load_classes()
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            # ImageNet-1K trained on ResNet50v2
+            """
+        )
+    # #############################################################################
+    # ################################ GradCam Tab ################################
+    # #############################################################################
+    with gr.Tab("GradCam"):
+        gr.Markdown(
+            """
+            Visualize Class Activations Maps generated by the model's layer for the predicted class.
+            This is used to see what the model is actually looking at in the image.
+            """
+        )
+        with gr.Row():
+            # Update the image input dimensions
+            img_input = [gr.Image(label="Input Image", type="numpy", height=224)]  # Changed dimensions
+            gradcam_outputs = [
+                gr.Label(label="Predictions"),
+                gr.Image(label="GradCAM Output", height=224)  # Match input image height
+            ]
+        with gr.Row():
+            gradcam_inputs = [
+                gr.Slider(0, 1, value=0.5, label="Activation Map Transparency"),
+                gr.Slider(1, 10, value=3, step=1, label="Number of Top Predictions"),
+                gr.Slider(1, 6, value=4, step=1, label="Target Layer Number")
+            ]
+        gradcam_button = gr.Button("Generate GradCAM")
+        # Pass model to inference function using partial
+        from functools import partial
+        inference_fn = partial(inference, model=model, classes=classes)
+        gradcam_button.click(inference_fn, inputs=img_input + gradcam_inputs, outputs=gradcam_outputs)
+        gr.Markdown("## Examples")
+        gr.Examples(
+            examples=[
+                ["./assets/examples/dog.jpg", 0.5, 3, 4],
+                ["./assets/examples/cat.jpg", 0.5, 3, 4],
+                ["./assets/examples/frog.jpg", 0.5, 3, 4],
+                ["./assets/examples/bird.jpg", 0.5, 3, 4],
+                ["./assets/examples/shark-plane.jpg", 0.5, 3, 4],
+                ["./assets/examples/car.jpg", 0.5, 3, 4],
+                ["./assets/examples/truck.jpg", 0.5, 3, 4],
+                ["./assets/examples/horse.jpg", 0.5, 3, 4],
+                ["./assets/examples/plane.jpg", 0.5, 3, 4],
+                ["./assets/examples/ship.png", 0.5, 3, 4]
+            ],
+            inputs=img_input + gradcam_inputs,
+            fn=inference_fn,
+            outputs=gradcam_outputs
+        )
+    gr.close_all()
+    demo.launch(debug=True)
+if __name__ == "__main__":
+    main()

assets/examples/bird.jpg ADDED Viewed

assets/examples/car.jpg ADDED Viewed

assets/examples/cat.jpg ADDED Viewed

assets/examples/dog.jpg ADDED Viewed

assets/examples/frog.jpg ADDED Viewed

assets/examples/horse.jpg ADDED Viewed

assets/examples/plane.jpg ADDED Viewed

Git LFS Details

SHA256: da6f28a859fa7137748904be0e7f4355f00fc66e600671c3448b9e9d7ce8f14b
Pointer size: 132 Bytes
Size of remote file: 2.52 MB

assets/examples/shark-plane.jpg ADDED Viewed

assets/examples/ship.png ADDED Viewed

assets/examples/truck.jpg ADDED Viewed

inference.py ADDED Viewed

	@@ -0,0 +1,102 @@

+#!/usr/bin/env python
+"""
+Inference script for ResNet50 trained on ImageNet-1K.
+"""
+# Standard Library Imports
+import numpy as np
+import torch
+from collections import OrderedDict
+# Third Party Imports
+from torchvision import transforms
+from torch.nn import functional as F
+from torchvision.models import resnet50
+from pytorch_grad_cam import GradCAM
+from pytorch_grad_cam.utils.image import show_cam_on_image
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+def inference(input_img,
+              model,
+              classes,
+              transparency=0.5,
+              number_of_top_classes=3,
+              target_layer_number=4):
+    """
+    Function to run inference on the input image
+    :param input_img: Image provided by the user
+    :param model: Model to use for inference
+    :param classes: Classes to use for inference
+    :param transparency: Percentage of cam overlap over the input image
+    :param number_of_top_classes: Number of top predictions for the input image
+    :param target_layer_number: Layer for which GradCam to be shown
+    """
+    # Save a copy of input img
+    org_img = input_img.copy()
+    # Calculate mean over each channel of input image
+    mean_r, mean_g, mean_b = np.mean(input_img[:, :, 0]/255.), np.mean(input_img[:, :, 1]/255.), np.mean(input_img[:, :, 2]/255.)
+    # Calculate Standard deviation over each channel
+    std_r, std_g, std_b = np.std(input_img[:, :, 0]/255.), np.std(input_img[:, :, 1]/255.), np.std(input_img[:, :, 2]/255.)
+    # Convert img to tensor and normalize it
+    _transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
+        ])
+    # Preprocess the input image
+    input_tensor = _transform(input_img)
+    # Create a mini-batch as expected by the model
+    input_tensor = input_tensor.unsqueeze(0)
+    # Move the input and model to GPU if available
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    input_tensor = input_tensor.to(device)
+    model.to(device)
+    # Get Model Predictions
+    with torch.no_grad():
+        outputs = model(input_tensor)
+        probabilities = torch.softmax(outputs, dim=1)[0]
+        del outputs
+        confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
+    # Select the top classes based on user input
+    sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
+    show_confidences = OrderedDict(sorted_confidences[:number_of_top_classes])
+    # Map layer numbers to meaningful parts of the ResNet architecture
+    _layers = {
+        1: model.conv1,          # Initial convolution layer
+        2: model.layer1[-1],     # Last bottleneck of first residual block
+        3: model.layer2[-1],     # Last bottleneck of second residual block
+        4: model.layer3[-1],     # Last bottleneck of third residual block
+        5: model.layer4[-1],     # Last bottleneck of fourth residual block
+        6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
+    }
+    # Ensure valid layer selection
+    target_layer_number = min(max(target_layer_number, 1), 6)
+    target_layers = [_layers[target_layer_number]]
+    # Get the class activations from the selected layer
+    cam = GradCAM(model=model, target_layers=target_layers)
+    # Get the most probable class index
+    top_class = max(confidences.items(), key=lambda x: x[1])[0]
+    class_idx = classes.index(top_class)
+    # Generate GradCAM for the top predicted class
+    grayscale_cam = cam(input_tensor=input_tensor,
+                       targets=[ClassifierOutputTarget(class_idx)],
+                       aug_smooth=True,
+                       eigen_smooth=True)
+    model.eval()
+    grayscale_cam = grayscale_cam[0, :]
+    # Overlay input image with Class activations
+    visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=transparency)
+    return show_confidences, visualization

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==3.38.1
+grad-cam==1.6.1
+numpy==1.25.2
+torch==2.0.1+cpu
+torchvision==0.15.2+cpu