Spaces:

Shilpaj
/

ImageNet

Sleeping

App Files Files Community

Shilpaj commited on Jan 3, 2025

Commit

f8ecba6

1 Parent(s): fdcadea

Refactor: Modifications for inference on GPU

Browse files

Files changed (3) hide show

app.py +104 -27
inference.py +78 -75
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -17,11 +17,16 @@ def load_model(model_path: str):
     """
     Load the model.
     """
-    # Load the pre-trained ResNet50 model using the new weights parameter
     model = models.resnet50(weights=None)
-    # Load custom weights from a .pth file with CPU mapping
-    state_dict = torch.load(model_path)
     # Filter out unexpected keys
     filtered_state_dict = {k: v for k, v in state_dict['model_state_dict'].items() if k in model.state_dict()}
@@ -42,10 +47,35 @@ def load_classes():
     return classes
 def main():
     """
     Main function for the application.
     """
     # Load the model at startup
     model = load_model("resnet50_imagenet1k.pth")
@@ -63,25 +93,51 @@ def main():
             gr.Markdown(
                 """
                 Visualize Class Activations Maps generated by the model's layer for the predicted class.
-                This is used to see what the model is actually looking at in the image.
                 """
             )
             with gr.Row():
-                img_input = gr.Image(label="Input Image", type="numpy", height=224)
                 with gr.Column():
                     label_output = gr.Label(label="Predictions")
-                    gradcam_output = gr.Image(label="GradCAM Output", height=224)
             with gr.Row():
-                alpha_slider = gr.Slider(0, 1, value=0.5, label="Activation Map Transparency")
-                top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Number of Top Predictions")
-                target_layer_slider = gr.Slider(1, 6, value=4, step=1, label="Target Layer Number")
             gradcam_button = gr.Button("Generate GradCAM")
-            def inference_wrapper(image, alpha, top_k, target_layer):
-                return inference(image, alpha, top_k, target_layer, model=model, classes=classes)
             gradcam_button.click(
                 fn=inference_wrapper,
                 inputs=[
@@ -90,30 +146,51 @@ def main():
                     top_k_slider,
                     target_layer_slider
                 ],
-                outputs=[label_output, gradcam_output]
             )
             gr.Examples(
                 examples=[
-                    ["./assets/examples/dog.jpg", 0.5, 3, 4],
-                    ["./assets/examples/cat.jpg", 0.5, 3, 4],
-                    ["./assets/examples/frog.jpg", 0.5, 3, 4],
-                    ["./assets/examples/bird.jpg", 0.5, 3, 4],
-                    ["./assets/examples/shark-plane.jpg", 0.5, 3, 4],
-                    ["./assets/examples/car.jpg", 0.5, 3, 4],
-                    ["./assets/examples/truck.jpg", 0.5, 3, 4],
-                    ["./assets/examples/horse.jpg", 0.5, 3, 4],
-                    ["./assets/examples/plane.jpg", 0.5, 3, 4],
-                    ["./assets/examples/ship.png", 0.5, 3, 4]
                 ],
-                inputs=[img_input, alpha_slider, top_k_slider, target_layer_slider],
-                outputs=[label_output, gradcam_output],
                 fn=inference_wrapper,
-                cache_examples=True
             )
         # Launch the demo
-        demo.launch(server_name="0.0.0.0", debug=True)
 if __name__ == "__main__":

     """
     Load the model.
     """
+    # Check if CUDA is available and set device
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Using device: {device}")
+    # Load the pre-trained ResNet50 model
     model = models.resnet50(weights=None)
+    model = model.to(device)
+    # Load custom weights from a .pth file
+    state_dict = torch.load(model_path, map_location=device)
     # Filter out unexpected keys
     filtered_state_dict = {k: v for k, v in state_dict['model_state_dict'].items() if k in model.state_dict()}
     return classes
+def inference_wrapper(image, alpha, top_k, target_layer):
+    """
+    Wrapper function for inference with error handling
+    """
+    try:
+        if image is None:
+            return None, None
+        with torch.cuda.amp.autocast():  # Enable automatic mixed precision
+            with torch.no_grad():  # Disable gradient calculation
+                return inference(
+                    image,
+                    alpha,
+                    top_k,
+                    target_layer,
+                    model=model,
+                    classes=classes
+                )
+    except Exception as e:
+        print(f"Error in inference: {str(e)}")
+        return gr.Error(f"Error processing image: {str(e)}")
 def main():
     """
     Main function for the application.
     """
+    global model, classes  # Make these global so they're accessible to inference_wrapper
     # Load the model at startup
     model = load_model("resnet50_imagenet1k.pth")
             gr.Markdown(
                 """
                 Visualize Class Activations Maps generated by the model's layer for the predicted class.
                 """
             )
+            # Define inputs
             with gr.Row():
+                img_input = gr.Image(
+                    label="Input Image",
+                    type="numpy",
+                    height=224,
+                    width=224
+                )
                 with gr.Column():
                     label_output = gr.Label(label="Predictions")
+                    gradcam_output = gr.Image(
+                        label="GradCAM Output",
+                        height=224,
+                        width=224
+                    )
             with gr.Row():
+                alpha_slider = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    value=0.5,
+                    step=0.1,
+                    label="Activation Map Transparency"
+                )
+                top_k_slider = gr.Slider(
+                    minimum=1,
+                    maximum=10,
+                    value=3,
+                    step=1,
+                    label="Number of Top Predictions"
+                )
+                target_layer_slider = gr.Slider(
+                    minimum=1,
+                    maximum=6,
+                    value=4,
+                    step=1,
+                    label="Target Layer Number"
+                )
             gradcam_button = gr.Button("Generate GradCAM")
+            # Set up the click event
             gradcam_button.click(
                 fn=inference_wrapper,
                 inputs=[
                     top_k_slider,
                     target_layer_slider
                 ],
+                outputs=[
+                    label_output,
+                    gradcam_output
+                ]
             )
+            # Example section
             gr.Examples(
                 examples=[
+                    ["assets/examples/dog.jpg", 0.5, 3, 4],
+                    ["assets/examples/cat.jpg", 0.5, 3, 4],
+                    ["assets/examples/frog.jpg", 0.5, 3, 4],
+                    ["assets/examples/bird.jpg", 0.5, 3, 4],
+                    ["assets/examples/shark-plane.jpg", 0.5, 3, 4],
+                    ["assets/examples/car.jpg", 0.5, 3, 4],
+                    ["assets/examples/truck.jpg", 0.5, 3, 4],
+                    ["assets/examples/horse.jpg", 0.5, 3, 4],
+                    ["assets/examples/plane.jpg", 0.5, 3, 4],
+                    ["assets/examples/ship.png", 0.5, 3, 4]
+                ],
+                inputs=[
+                    img_input,
+                    alpha_slider,
+                    top_k_slider,
+                    target_layer_slider
+                ],
+                outputs=[
+                    label_output,
+                    gradcam_output
                 ],
                 fn=inference_wrapper,
+                cache_examples=True,
+                label="Click on any example to run GradCAM"
             )
         # Launch the demo
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            debug=True,
+            enable_queue=True,
+            show_error=True,
+            max_threads=4
+        )
 if __name__ == "__main__":

inference.py CHANGED Viewed

@@ -20,80 +20,83 @@ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 @spaces.GPU
 def inference(image, alpha, top_k, target_layer, model=None, classes=None):
     """
-    Function to run inference on the input image
-    :param image: Image provided by the user
-    :param alpha: Percentage of cam overlap over the input image
-    :param top_k: Number of top predictions for the input image
-    :param target_layer: Layer for which GradCam to be shown
-    :param model: Model to use for inference
-    :param classes: Classes to use for inference
     """
-    # Save a copy of input img
-    org_img = image.copy()
-    # Calculate mean over each channel of input image
-    mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
-    # Calculate Standard deviation over each channel
-    std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
-    # Convert img to tensor and normalize it
-    _transform = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
-        ])
-    # Preprocess the input image
-    input_tensor = _transform(image)
-    # Create a mini-batch as expected by the model
-    input_tensor = input_tensor.unsqueeze(0)
-    # Move the input and model to GPU if available
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    input_tensor = input_tensor.to(device)
-    model.to(device)
-    # Get Model Predictions
-    with torch.no_grad():
-        outputs = model(input_tensor)
-        probabilities = torch.softmax(outputs, dim=1)[0]
-        del outputs
-        confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
-    # Select the top classes based on user input
-    sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
-    show_confidences = OrderedDict(sorted_confidences[:top_k])
-    # Map layer numbers to meaningful parts of the ResNet architecture
-    _layers = {
-        1: model.conv1,          # Initial convolution layer
-        2: model.layer1[-1],     # Last bottleneck of first residual block
-        3: model.layer2[-1],     # Last bottleneck of second residual block
-        4: model.layer3[-1],     # Last bottleneck of third residual block
-        5: model.layer4[-1],     # Last bottleneck of fourth residual block
-        6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
-    }
-    # Ensure valid layer selection
-    target_layer = min(max(target_layer, 1), 6)
-    target_layers = [_layers[target_layer]]
-    # Get the class activations from the selected layer
-    cam = GradCAM(model=model, target_layers=target_layers)
-    # Get the most probable class index
-    top_class = max(confidences.items(), key=lambda x: x[1])[0]
-    class_idx = classes.index(top_class)
-    # Generate GradCAM for the top predicted class
-    grayscale_cam = cam(input_tensor=input_tensor,
-                       targets=[ClassifierOutputTarget(class_idx)],
-                       aug_smooth=True,
-                       eigen_smooth=True)
     model.eval()
-    grayscale_cam = grayscale_cam[0, :]
-    # Overlay input image with Class activations
-    visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
-    return show_confidences, visualization

 @spaces.GPU
 def inference(image, alpha, top_k, target_layer, model=None, classes=None):
     """
+    Run inference with GradCAM visualization
     """
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    # Ensure model is on correct device and in eval mode
+    model = model.to(device)
     model.eval()
+    # Convert input to tensor and move to GPU
+    if isinstance(image, np.ndarray):
+        image_tensor = torch.from_numpy(image).to(device)
+        if image_tensor.ndim == 3:
+            image_tensor = image_tensor.unsqueeze(0)
+    else:
+        image_tensor = image.to(device)
+    with torch.cuda.amp.autocast():  # Enable automatic mixed precision
+        with torch.no_grad():
+            # Save a copy of input img
+            org_img = image.copy()
+            # Calculate mean over each channel of input image
+            mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
+            # Calculate Standard deviation over each channel
+            std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
+            # Convert img to tensor and normalize it
+            _transform = transforms.Compose([
+                transforms.ToTensor(),
+                transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
+                ])
+            # Preprocess the input image
+            input_tensor = _transform(image)
+            # Create a mini-batch as expected by the model
+            input_tensor = input_tensor.unsqueeze(0)
+            # Get Model Predictions
+            outputs = model(input_tensor)
+            probabilities = torch.softmax(outputs, dim=1)[0]
+            del outputs
+            confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
+            # Select the top classes based on user input
+            sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
+            show_confidences = OrderedDict(sorted_confidences[:top_k])
+            # Map layer numbers to meaningful parts of the ResNet architecture
+            _layers = {
+                1: model.conv1,          # Initial convolution layer
+                2: model.layer1[-1],     # Last bottleneck of first residual block
+                3: model.layer2[-1],     # Last bottleneck of second residual block
+                4: model.layer3[-1],     # Last bottleneck of third residual block
+                5: model.layer4[-1],     # Last bottleneck of fourth residual block
+                6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
+            }
+            # Ensure valid layer selection
+            target_layer = min(max(target_layer, 1), 6)
+            target_layers = [_layers[target_layer]]
+            # Get the class activations from the selected layer
+            cam = GradCAM(model=model, target_layers=target_layers)
+            # Get the most probable class index
+            top_class = max(confidences.items(), key=lambda x: x[1])[0]
+            class_idx = classes.index(top_class)
+            # Generate GradCAM for the top predicted class
+            grayscale_cam = cam(input_tensor=input_tensor,
+                               targets=[ClassifierOutputTarget(class_idx)],
+                               aug_smooth=True,
+                               eigen_smooth=True)
+            grayscale_cam = grayscale_cam[0, :]
+            # Overlay input image with Class activations
+            visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
+            return show_confidences, visualization

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ grad-cam
 numpy<2.0.0
 torch==2.0.1
 torchvision==0.15.2

 numpy<2.0.0
 torch==2.0.1
 torchvision==0.15.2
+Pillow>=9.0.0