Spaces:

Shilpaj
/

ImageNet

Sleeping

App Files Files Community

Shilpaj commited on Jan 3, 2025

Commit

b67331d

1 Parent(s): ad93eca

Fix: Runtime error

Browse files

Files changed (2) hide show

app.py +124 -113
inference.py +61 -53

app.py CHANGED Viewed

@@ -53,143 +53,154 @@ def inference_wrapper(image, alpha, top_k, target_layer):
     """
     try:
         if image is None:
-            return None, None
-        with torch.cuda.amp.autocast():  # Enable automatic mixed precision
-            with torch.no_grad():  # Disable gradient calculation
-                return inference(
-                    image,
-                    alpha,
-                    top_k,
-                    target_layer,
-                    model=model,
-                    classes=classes
-                )
     except Exception as e:
         print(f"Error in inference: {str(e)}")
-        return None, None
 def main():
     """
     Main function for the application.
     """
-    global model, classes  # Make these global so they're accessible to inference_wrapper
-    # Load the model at startup
-    model = load_model("resnet50_imagenet1k.pth")
-    # Load the classes at startup
-    classes = load_classes()
-    with gr.Blocks() as demo:
-        gr.Markdown(
-            """
-            # ImageNet-1K trained on ResNet50v2
-            """
-        )
-        with gr.Tab("GradCam"):
             gr.Markdown(
                 """
-                Visualize Class Activations Maps generated by the model's layer for the predicted class.
                 """
             )
-            # Define inputs
-            with gr.Row():
-                img_input = gr.Image(
-                    label="Input Image",
-                    type="numpy",
-                    height=224,
-                    width=224
                 )
-                with gr.Column():
-                    label_output = gr.Label(label="Predictions")
-                    gradcam_output = gr.Image(
-                        label="GradCAM Output",
                         height=224,
                         width=224
                     )
-            with gr.Row():
-                alpha_slider = gr.Slider(
-                    minimum=0,
-                    maximum=1,
-                    value=0.5,
-                    step=0.1,
-                    label="Activation Map Transparency"
-                )
-                top_k_slider = gr.Slider(
-                    minimum=1,
-                    maximum=10,
-                    value=3,
-                    step=1,
-                    label="Number of Top Predictions"
-                )
-                target_layer_slider = gr.Slider(
-                    minimum=1,
-                    maximum=6,
-                    value=4,
-                    step=1,
-                    label="Target Layer Number"
                 )
-            gradcam_button = gr.Button("Generate GradCAM")
-            # Set up the click event
-            gradcam_button.click(
-                fn=inference_wrapper,
-                inputs=[
-                    img_input,
-                    alpha_slider,
-                    top_k_slider,
-                    target_layer_slider
-                ],
-                outputs=[
-                    label_output,
-                    gradcam_output
-                ]
-            )
-            # Example section
-            gr.Examples(
-                examples=[
-                    ["assets/examples/dog.jpg", 0.5, 3, 4],
-                    ["assets/examples/cat.jpg", 0.5, 3, 4],
-                    ["assets/examples/frog.jpg", 0.5, 3, 4],
-                    ["assets/examples/bird.jpg", 0.5, 3, 4],
-                    ["assets/examples/shark-plane.jpg", 0.5, 3, 4],
-                    ["assets/examples/car.jpg", 0.5, 3, 4],
-                    ["assets/examples/truck.jpg", 0.5, 3, 4],
-                    ["assets/examples/horse.jpg", 0.5, 3, 4],
-                    ["assets/examples/plane.jpg", 0.5, 3, 4],
-                    ["assets/examples/ship.png", 0.5, 3, 4]
-                ],
-                inputs=[
-                    img_input,
-                    alpha_slider,
-                    top_k_slider,
-                    target_layer_slider
-                ],
-                outputs=[
-                    label_output,
-                    gradcam_output
-                ],
-                fn=inference_wrapper,
-                cache_examples=True,
-                label="Click on any example to run GradCAM"
             )
-        # Launch the demo
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=False,
-            debug=True,
-            show_error=True,
-            max_threads=4
-        )
 if __name__ == "__main__":

     """
     try:
         if image is None:
+            return {"error": "No image provided"}, None
+        results = inference(
+            image,
+            alpha,
+            top_k,
+            target_layer,
+            model=model,
+            classes=classes
+        )
+        if results is None:
+            return {"error": "Processing failed"}, None
+        return results
     except Exception as e:
         print(f"Error in inference: {str(e)}")
+        return {"error": str(e)}, None
 def main():
     """
     Main function for the application.
     """
+    global model, classes
+    try:
+        # Load the model at startup
+        model = load_model("resnet50_imagenet1k.pth")
+        # Load the classes at startup
+        classes = load_classes()
+        with gr.Blocks() as demo:
             gr.Markdown(
                 """
+                # ImageNet-1K trained on ResNet50v2
                 """
             )
+            with gr.Tab("GradCam"):
+                gr.Markdown(
+                    """
+                    Visualize Class Activations Maps generated by the model's layer for the predicted class.
+                    """
                 )
+                # Define inputs
+                with gr.Row():
+                    img_input = gr.Image(
+                        label="Input Image",
+                        type="numpy",
                         height=224,
                         width=224
                     )
+                    with gr.Column():
+                        label_output = gr.Label(label="Predictions")
+                        gradcam_output = gr.Image(
+                            label="GradCAM Output",
+                            height=224,
+                            width=224
+                        )
+                with gr.Row():
+                    alpha_slider = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        value=0.5,
+                        step=0.1,
+                        label="Activation Map Transparency"
+                    )
+                    top_k_slider = gr.Slider(
+                        minimum=1,
+                        maximum=10,
+                        value=3,
+                        step=1,
+                        label="Number of Top Predictions"
+                    )
+                    target_layer_slider = gr.Slider(
+                        minimum=1,
+                        maximum=6,
+                        value=4,
+                        step=1,
+                        label="Target Layer Number"
+                    )
+                gradcam_button = gr.Button("Generate GradCAM")
+                # Set up the click event
+                gradcam_button.click(
+                    fn=inference_wrapper,
+                    inputs=[
+                        img_input,
+                        alpha_slider,
+                        top_k_slider,
+                        target_layer_slider
+                    ],
+                    outputs=[
+                        label_output,
+                        gradcam_output
+                    ]
                 )
+                # Example section
+                gr.Examples(
+                    examples=[
+                        ["assets/examples/dog.jpg", 0.5, 3, 4],
+                        ["assets/examples/cat.jpg", 0.5, 3, 4],
+                        ["assets/examples/frog.jpg", 0.5, 3, 4],
+                        ["assets/examples/bird.jpg", 0.5, 3, 4],
+                        ["assets/examples/shark-plane.jpg", 0.5, 3, 4],
+                        ["assets/examples/car.jpg", 0.5, 3, 4],
+                        ["assets/examples/truck.jpg", 0.5, 3, 4],
+                        ["assets/examples/horse.jpg", 0.5, 3, 4],
+                        ["assets/examples/plane.jpg", 0.5, 3, 4],
+                        ["assets/examples/ship.png", 0.5, 3, 4]
+                    ],
+                    inputs=[
+                        img_input,
+                        alpha_slider,
+                        top_k_slider,
+                        target_layer_slider
+                    ],
+                    outputs=[
+                        label_output,
+                        gradcam_output
+                    ],
+                    fn=inference_wrapper,
+                    cache_examples=True,
+                    label="Click on any example to run GradCAM"
+                )
+            # Launch the demo with reduced memory usage
+            demo.launch(
+                server_name="0.0.0.0",
+                server_port=7860,
+                share=False,
+                debug=True,
+                show_error=True,
+                max_threads=1,  # Reduce concurrent processing
+                enable_queue=True,  # Enable queuing to prevent memory issues
+                cache_examples=False  # Disable example caching
             )
+    except Exception as e:
+        print(f"Error during startup: {str(e)}")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
 if __name__ == "__main__":

inference.py CHANGED Viewed

@@ -22,31 +22,30 @@ def inference(image, alpha, top_k, target_layer, model=None, classes=None):
     """
     Run inference with GradCAM visualization
     """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    # Ensure model is on correct device and in eval mode
-    model = model.to(device)
-    model.eval()
-    # Save a copy of input img
-    org_img = image.copy()
-    # Calculate mean and std over each channel
-    mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
-    std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
-    # Convert img to tensor and normalize it
-    _transform = transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
-    ])
-    # Preprocess the input image and move to device
-    input_tensor = _transform(image).to(device)
-    input_tensor = input_tensor.unsqueeze(0)
-    # For predictions, we don't need gradients
-    with torch.no_grad():
         # Get Model Predictions
         outputs = model(input_tensor)
         probabilities = torch.softmax(outputs, dim=1)[0]
@@ -56,37 +55,46 @@ def inference(image, alpha, top_k, target_layer, model=None, classes=None):
         sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
         show_confidences = OrderedDict(sorted_confidences[:top_k])
-    # Map layer numbers to meaningful parts of the ResNet architecture
-    _layers = {
-        1: model.conv1,          # Initial convolution layer
-        2: model.layer1[-1],     # Last bottleneck of first residual block
-        3: model.layer2[-1],     # Last bottleneck of second residual block
-        4: model.layer3[-1],     # Last bottleneck of third residual block
-        5: model.layer4[-1],     # Last bottleneck of fourth residual block
-        6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
-    }
-    # Ensure valid layer selection
-    target_layer = min(max(target_layer, 1), 6)
-    target_layers = [_layers[target_layer]]
-    # Get the class activations from the selected layer
-    cam = GradCAM(model=model, target_layers=target_layers)
-    # Get the most probable class index
-    top_class = max(confidences.items(), key=lambda x: x[1])[0]
-    class_idx = classes.index(top_class)
-    # Enable gradients for GradCAM computation
-    input_tensor.requires_grad = True
-    # Generate GradCAM for the top predicted class
-    grayscale_cam = cam(input_tensor=input_tensor,
-                       targets=[ClassifierOutputTarget(class_idx)],
-                       aug_smooth=True,
-                       eigen_smooth=True)
-    grayscale_cam = grayscale_cam[0, :]
-    # Overlay input image with Class activations
-    visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
-    return show_confidences, visualization

     """
     Run inference with GradCAM visualization
     """
+    try:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Ensure model is on correct device and in eval mode
+        model = model.to(device)
+        model.eval()
+        # Save a copy of input img
+        org_img = image.copy()
+        # Convert img to tensor and normalize it
+        _transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            )
+        ])
+        # Preprocess the input image and move to device
+        input_tensor = _transform(image).to(device)
+        input_tensor = input_tensor.unsqueeze(0)
+        input_tensor.requires_grad = True
         # Get Model Predictions
         outputs = model(input_tensor)
         probabilities = torch.softmax(outputs, dim=1)[0]
         sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
         show_confidences = OrderedDict(sorted_confidences[:top_k])
+        # Map layer numbers to meaningful parts of the ResNet architecture
+        _layers = {
+            1: model.conv1,
+            2: model.layer1[-1],
+            3: model.layer2[-1],
+            4: model.layer3[-1],
+            5: model.layer4[-1],
+            6: model.layer4[-1]
+        }
+        # Ensure valid layer selection
+        target_layer = min(max(target_layer, 1), 6)
+        target_layers = [_layers[target_layer]]
+        # Get the class activations from the selected layer
+        cam = GradCAM(model=model, target_layers=target_layers)
+        # Get the most probable class index
+        top_class = max(confidences.items(), key=lambda x: x[1])[0]
+        class_idx = classes.index(top_class)
+        # Generate GradCAM for the top predicted class
+        grayscale_cam = cam(
+            input_tensor=input_tensor,
+            targets=[ClassifierOutputTarget(class_idx)],
+            aug_smooth=False,  # Disable augmentation for memory efficiency
+            eigen_smooth=False  # Disable eigen smoothing for memory efficiency
+        )
+        grayscale_cam = grayscale_cam[0, :]
+        # Overlay input image with Class activations
+        visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
+        # Clear CUDA cache
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return show_confidences, visualization
+    except Exception as e:
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        raise e