Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Inference script for ResNet50 trained on ImageNet-1K. | |
| """ | |
| # Standard Library Imports | |
| import numpy as np | |
| import torch | |
| from collections import OrderedDict | |
| # Third Party Imports | |
| import spaces | |
| from torchvision import transforms | |
| from torch.nn import functional as F | |
| from torchvision.models import resnet50 | |
| from pytorch_grad_cam import GradCAM | |
| from pytorch_grad_cam.utils.image import show_cam_on_image | |
| from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget | |
| def inference(image, alpha, top_k, target_layer, model=None, classes=None): | |
| """ | |
| Run inference with GradCAM visualization | |
| """ | |
| try: | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| # Debug: Print model mode | |
| print(f"Model mode: {model.training}") | |
| # Ensure model is on correct device and in eval mode | |
| model = model.to(device) | |
| model.eval() | |
| with torch.cuda.amp.autocast(): | |
| org_img = image.copy() | |
| # Convert img to tensor and normalize it | |
| _transform = transforms.Compose([ | |
| transforms.ToTensor(), | |
| transforms.Normalize( | |
| mean=[0.485, 0.456, 0.406], | |
| std=[0.229, 0.224, 0.225] | |
| ) | |
| ]) | |
| # Debug: Print image tensor stats | |
| input_tensor = _transform(image).to(device) | |
| print(f"Input tensor shape: {input_tensor.shape}") | |
| print(f"Input tensor range: [{input_tensor.min():.2f}, {input_tensor.max():.2f}]") | |
| input_tensor = input_tensor.unsqueeze(0) | |
| input_tensor.requires_grad = True | |
| # Get Model Predictions | |
| outputs = model(input_tensor) | |
| print(f"Raw output shape: {outputs.shape}") | |
| print(f"Raw output range: [{outputs.min():.2f}, {outputs.max():.2f}]") | |
| probabilities = torch.softmax(outputs, dim=1)[0] | |
| print(f"Probabilities sum: {probabilities.sum():.2f}") # Should be close to 1.0 | |
| # Get top 5 predictions for debugging | |
| top_probs, top_indices = torch.topk(probabilities, 5) | |
| print("\nTop 5 predictions:") | |
| for idx, (prob, class_idx) in enumerate(zip(top_probs, top_indices)): | |
| class_name = classes[class_idx] | |
| print(f"{idx+1}. {class_name}: {prob:.4f}") | |
| # Create confidence dictionary | |
| confidences = {classes[i]: float(probabilities[i]) for i in range(len(classes))} | |
| sorted_confidences = sorted(confidences.items(), key=lambda x: x[1], reverse=True) | |
| show_confidences = OrderedDict(sorted_confidences[:top_k]) | |
| # Map layer numbers to meaningful parts of the ResNet architecture | |
| _layers = { | |
| 1: model.conv1, | |
| 2: model.layer1[-1], | |
| 3: model.layer2[-1], | |
| 4: model.layer3[-1], | |
| 5: model.layer4[-1], | |
| 6: model.layer4[-1] | |
| } | |
| target_layer = min(max(target_layer, 1), 6) | |
| target_layers = [_layers[target_layer]] | |
| # Debug: Print selected layer | |
| print(f"\nUsing target layer: {target_layers[0]}") | |
| cam = GradCAM(model=model, target_layers=target_layers) | |
| # Get the most probable class index | |
| top_class = max(confidences.items(), key=lambda x: x[1])[0] | |
| class_idx = classes.index(top_class) | |
| print(f"\nSelected class for GradCAM: {top_class} (index: {class_idx})") | |
| grayscale_cam = cam( | |
| input_tensor=input_tensor, | |
| targets=[ClassifierOutputTarget(class_idx)], | |
| aug_smooth=False, | |
| eigen_smooth=False | |
| ) | |
| grayscale_cam = grayscale_cam[0, :] | |
| visualization = show_cam_on_image(org_img/255., grayscale_cam, use_rgb=True, image_weight=alpha) | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| return show_confidences, visualization | |
| except Exception as e: | |
| print(f"Error in inference: {str(e)}") | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| raise e | |