from PIL import Image import sys import os import distutils.core import pandas as pd import numpy as np import torch import torch.nn as nn from torchvision import transforms, models import gradio as gr sys.path.insert(0, os.path.abspath('./detectron2-main')) import detectron2 from detectron2.utils.logger import setup_logger setup_logger() from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg import os, json, cv2, random from detectron2.utils.visualizer import Visualizer from detectron2.data import MetadataCatalog, DatasetCatalog import torch from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2 import model_zoo import cv2 import numpy as np from PIL import Image from torchvision import transforms, models import torch.nn as nn from sklearn.metrics.pairwise import cosine_similarity device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dist = distutils.core.run_setup("./detectron2-main/setup.py") def setup_model(): cfg = get_cfg() cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") predictor = DefaultPredictor(cfg) return predictor predictor = setup_model() SEED = 2024 torch.manual_seed(SEED) np.random.seed(SEED) df = pd.read_csv('morphs.csv') morphs = {} for _, row in df.iterrows(): bodytype = row['BodyType'] modelyear = row['ModelYear'] features_str = row['Features'] features = np.array(list(map(float, features_str.split(',')))) morphs[(bodytype, modelyear)] = features # Define GradCAM class class GradCAM: def __init__(self, model, target_layer): self.model = model self.target_layer = target_layer self.gradients = None self.activations = None self.hook_a = self.target_layer.register_forward_hook(self.save_activation) self.hook_g = self.target_layer.register_backward_hook(self.save_gradient) def save_activation(self, module, input, output): self.activations = output def save_gradient(self, module, grad_in, grad_out): self.gradients = grad_out[0] def __call__(self, input_tensor, class_idx): self.model.zero_grad() output = self.model(input_tensor) score = output[:, class_idx].squeeze() score.backward(retain_graph=True) gradient = self.gradients.cpu().data.numpy()[0] activation = self.activations.cpu().data.numpy()[0] weights = np.mean(gradient, axis=(1, 2)) cam = np.zeros(activation.shape[1:], dtype=np.float32) for i, w in enumerate(weights): cam += w * activation[i, :, :] cam = np.maximum(cam, 0) cam = cv2.resize(cam, (input_tensor.size(2), input_tensor.size(3))) cam -= np.min(cam) cam /= np.max(cam) return cam def overlay_heatmap_on_image(heatmap, image, alpha=0.4, colormap=cv2.COLORMAP_JET): heatmap = np.uint8(255 * heatmap) heatmap = cv2.applyColorMap(heatmap, colormap) overlayed_img = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0) return overlayed_img # Set up the model def setup_model(): cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available predictor = DefaultPredictor(cfg) return predictor # Function to segment image def segment_image(image): image = np.array(image) outputs = predictor(image) instances = outputs["instances"] pred_classes = instances.pred_classes car_class_id = 2 # COCO class id for car # Check if there are any cars detected car_indices = [i for i, x in enumerate(pred_classes) if x == car_class_id] if len(car_indices) == 0: return "No automobiles found in the image", None, None, None, None, None, None # Find the largest car instance largest_car_index = max(car_indices, key=lambda i: instances.pred_masks[i].sum().item()) car_mask = instances.pred_masks[largest_car_index].cpu().numpy() # Create a white background image white_bg = np.ones_like(image) * 255 # Extract car region car_region = np.where(car_mask[:, :, None], image, white_bg) # Get bounding box coordinates y_indices, x_indices = np.where(car_mask) y_min, y_max = y_indices.min(), y_indices.max() x_min, x_max = x_indices.min(), x_indices.max() # Crop the car region cropped_car = car_region[y_min:y_max, x_min:x_max] # Convert cropped image to PIL format for display cropped_car_pil = Image.fromarray(cropped_car.astype('uint8'), 'RGB') # Preprocess the cropped car image for the model transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) input_tensor = transform(cropped_car_pil).unsqueeze(0).to(device) # Load models modernity_model = models.resnet18(pretrained=True) modernity_model.fc = nn.Linear(modernity_model.fc.in_features, 5) modernity_checkpoint = torch.load('modernity.pth', map_location=device) modernity_model.load_state_dict(modernity_checkpoint) modernity_model.to(device) modernity_model.eval() typicality_model = models.resnet18(pretrained=True) typicality_model.fc = nn.Linear(typicality_model.fc.in_features, 5) typicality_checkpoint = torch.load('typicality.pth', map_location=device) typicality_model.load_state_dict(typicality_checkpoint) typicality_model.to(device) typicality_model.eval() activation = {} def get_activation(name): def hook(model, input, output): activation[name] = output.detach() return hook hook_handle = typicality_model.avgpool.register_forward_hook(get_activation('avgpool')) with torch.no_grad(): #image = image.to(device) typicality_output = typicality_model(input_tensor) typicality_features = activation['avgpool'].cpu().numpy() hook_handle.remove() # Function to calculate similarity for a new feature without specific body_type and model_year def calculate_similarity_with_all_groups(new_feature): similarities = [] for key, morph in morphs.items(): similarity = cosine_similarity(morph.reshape(1, -1), new_feature.reshape(1, -1))[0][0] similarities.append((similarity, key)) similarities.sort(reverse=True, key=lambda x: x[0]) return similarities similarity_scores = calculate_similarity_with_all_groups(typicality_features) typicality_scores = similarity_scores[0][0] most_similar_group = similarity_scores[0][1] # Print the most similar group print(typicality_scores,most_similar_group) # Calculate modernity scores with torch.no_grad(): modernity_output = modernity_model(input_tensor) def calculate_modernity_scores(outputs, year_categories): probabilities = nn.functional.softmax(outputs, dim=1) modernity_scores = (probabilities * year_categories).sum(dim=1) return modernity_scores year_categories = torch.tensor([0, 1, 2, 3, 4], dtype=torch.float32).to(device) modernity_scores = calculate_modernity_scores(modernity_output, year_categories).item() target_layer = modernity_model.layer4[-1] modernity_cam = GradCAM(modernity_model, target_layer) modernity_heatmap = modernity_cam(input_tensor, class_idx=torch.argmax(modernity_output).item()) target_layer = typicality_model.layer4[-1] typicality_cam = GradCAM(typicality_model, target_layer) typicality_heatmap = typicality_cam(input_tensor, class_idx=torch.argmax(typicality_output).item()) # Convert the input image to a format suitable for overlaying img_np = np.array(cropped_car_pil) img_np = cv2.resize(img_np, (224, 224)) # Overlay the heatmap on the image overlayed_img_modernity = overlay_heatmap_on_image(modernity_heatmap, img_np) overlayed_img_typicality = overlay_heatmap_on_image(typicality_heatmap, img_np) # Convert overlayed images back to PIL for saving overlayed_img_modernity_pil = Image.fromarray(cv2.cvtColor(overlayed_img_modernity, cv2.COLOR_BGR2RGB)) overlayed_img_typicality_pil = Image.fromarray(cv2.cvtColor(overlayed_img_typicality, cv2.COLOR_BGR2RGB)) return "Automobiles detected in the image", cropped_car_pil, modernity_scores, typicality_scores , most_similar_group, overlayed_img_modernity_pil, overlayed_img_typicality_pil # Create Gradio interface iface = gr.Interface( fn=segment_image, inputs= gr.Image(type="pil", label="Upload Image"), outputs=[ gr.Textbox(label="Output"), gr.Image(label="Cropped Car Image"), gr.Textbox(label="Modernity Score"), gr.Textbox(label="Typicality Score"), gr.Textbox(label="most_similar_group"), gr.Image(label="Grad-CAM for Type"), gr.Image(label="Grad-CAM for Year") ], title="Automobile Detection and Scoring using Mask R-CNN", description="Upload an image, and the system will detect and segment automobiles, crop the largest car, and predict its modernity and typicality scores. Grad-CAM heatmaps will also be generated." ) iface.launch()