Spaces:

Shilpaj
/

ImageNet

Sleeping

App Files Files Community

Shilpaj commited on Jan 3, 2025

Commit

ad93eca

1 Parent(s): 601860b

Fix: Inferencing issue

Browse files

Files changed (1) hide show

inference.py +52 -50

inference.py CHANGED Viewed

@@ -28,63 +28,65 @@ def inference(image, alpha, top_k, target_layer, model=None, classes=None):
     model = model.to(device)
     model.eval()
-    with torch.cuda.amp.autocast():  # Enable automatic mixed precision
-        with torch.no_grad():
-            # Save a copy of input img
-            org_img = image.copy()
-            # Calculate mean and std over each channel
-            mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
-            std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
-            # Convert img to tensor and normalize it
-            _transform = transforms.Compose([
-                transforms.ToTensor(),
-                transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
-            ])
-            # Preprocess the input image and move to device
-            input_tensor = _transform(image).to(device)
-            input_tensor = input_tensor.unsqueeze(0)
-            # Get Model Predictions
-            outputs = model(input_tensor)
-            probabilities = torch.softmax(outputs, dim=1)[0]
-            del outputs
-            confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
-            # Select the top classes based on user input
-            sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
-            show_confidences = OrderedDict(sorted_confidences[:top_k])
-            # Map layer numbers to meaningful parts of the ResNet architecture
-            _layers = {
-                1: model.conv1,          # Initial convolution layer
-                2: model.layer1[-1],     # Last bottleneck of first residual block
-                3: model.layer2[-1],     # Last bottleneck of second residual block
-                4: model.layer3[-1],     # Last bottleneck of third residual block
-                5: model.layer4[-1],     # Last bottleneck of fourth residual block
-                6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
-            }
-            # Ensure valid layer selection
-            target_layer = min(max(target_layer, 1), 6)
-            target_layers = [_layers[target_layer]]
-            # Get the class activations from the selected layer
-            cam = GradCAM(model=model, target_layers=target_layers)
-            # Get the most probable class index
-            top_class = max(confidences.items(), key=lambda x: x[1])[0]
-            class_idx = classes.index(top_class)
-            # Generate GradCAM for the top predicted class
-            grayscale_cam = cam(input_tensor=input_tensor,
-                               targets=[ClassifierOutputTarget(class_idx)],
-                               aug_smooth=True,
-                               eigen_smooth=True)
-            grayscale_cam = grayscale_cam[0, :]
-            # Overlay input image with Class activations
-            visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
-            return show_confidences, visualization

     model = model.to(device)
     model.eval()
+    # Save a copy of input img
+    org_img = image.copy()
+    # Calculate mean and std over each channel
+    mean_r, mean_g, mean_b = np.mean(image[:, :, 0]/255.), np.mean(image[:, :, 1]/255.), np.mean(image[:, :, 2]/255.)
+    std_r, std_g, std_b = np.std(image[:, :, 0]/255.), np.std(image[:, :, 1]/255.), np.std(image[:, :, 2]/255.)
+    # Convert img to tensor and normalize it
+    _transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((mean_r, mean_g, mean_b), (std_r, std_g, std_b))
+    ])
+    # Preprocess the input image and move to device
+    input_tensor = _transform(image).to(device)
+    input_tensor = input_tensor.unsqueeze(0)
+    # For predictions, we don't need gradients
+    with torch.no_grad():
+        # Get Model Predictions
+        outputs = model(input_tensor)
+        probabilities = torch.softmax(outputs, dim=1)[0]
+        confidences = {classes[i]: float(probabilities[i]) for i in range(1000)}
+        # Select the top classes based on user input
+        sorted_confidences = sorted(confidences.items(), key=lambda val: val[1], reverse=True)
+        show_confidences = OrderedDict(sorted_confidences[:top_k])
+    # Map layer numbers to meaningful parts of the ResNet architecture
+    _layers = {
+        1: model.conv1,          # Initial convolution layer
+        2: model.layer1[-1],     # Last bottleneck of first residual block
+        3: model.layer2[-1],     # Last bottleneck of second residual block
+        4: model.layer3[-1],     # Last bottleneck of third residual block
+        5: model.layer4[-1],     # Last bottleneck of fourth residual block
+        6: model.layer4[-1]      # Changed from fc to last conv layer for better visualization
+    }
+    # Ensure valid layer selection
+    target_layer = min(max(target_layer, 1), 6)
+    target_layers = [_layers[target_layer]]
+    # Get the class activations from the selected layer
+    cam = GradCAM(model=model, target_layers=target_layers)
+    # Get the most probable class index
+    top_class = max(confidences.items(), key=lambda x: x[1])[0]
+    class_idx = classes.index(top_class)
+    # Enable gradients for GradCAM computation
+    input_tensor.requires_grad = True
+    # Generate GradCAM for the top predicted class
+    grayscale_cam = cam(input_tensor=input_tensor,
+                       targets=[ClassifierOutputTarget(class_idx)],
+                       aug_smooth=True,
+                       eigen_smooth=True)
+    grayscale_cam = grayscale_cam[0, :]
+    # Overlay input image with Class activations
+    visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha)
+    return show_confidences, visualization