Spaces:
Sleeping
Sleeping
| from PIL import Image | |
| import sys | |
| import os | |
| import distutils.core | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| from torchvision import transforms, models | |
| import gradio as gr | |
| sys.path.insert(0, os.path.abspath('./detectron2-main')) | |
| import detectron2 | |
| from detectron2.utils.logger import setup_logger | |
| setup_logger() | |
| from detectron2 import model_zoo | |
| from detectron2.engine import DefaultPredictor | |
| from detectron2.config import get_cfg | |
| import os, json, cv2, random | |
| from detectron2.utils.visualizer import Visualizer | |
| from detectron2.data import MetadataCatalog, DatasetCatalog | |
| import torch | |
| from detectron2.engine import DefaultPredictor | |
| from detectron2.config import get_cfg | |
| from detectron2 import model_zoo | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| from torchvision import transforms, models | |
| import torch.nn as nn | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| dist = distutils.core.run_setup("./detectron2-main/setup.py") | |
| def setup_model(): | |
| cfg = get_cfg() | |
| cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) | |
| cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 | |
| cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") | |
| predictor = DefaultPredictor(cfg) | |
| return predictor | |
| predictor = setup_model() | |
| SEED = 2024 | |
| torch.manual_seed(SEED) | |
| np.random.seed(SEED) | |
| df = pd.read_csv('morphs.csv') | |
| morphs = {} | |
| for _, row in df.iterrows(): | |
| bodytype = row['BodyType'] | |
| modelyear = row['ModelYear'] | |
| features_str = row['Features'] | |
| features = np.array(list(map(float, features_str.split(',')))) | |
| morphs[(bodytype, modelyear)] = features | |
| # Define GradCAM class | |
| class GradCAM: | |
| def __init__(self, model, target_layer): | |
| self.model = model | |
| self.target_layer = target_layer | |
| self.gradients = None | |
| self.activations = None | |
| self.hook_a = self.target_layer.register_forward_hook(self.save_activation) | |
| self.hook_g = self.target_layer.register_backward_hook(self.save_gradient) | |
| def save_activation(self, module, input, output): | |
| self.activations = output | |
| def save_gradient(self, module, grad_in, grad_out): | |
| self.gradients = grad_out[0] | |
| def __call__(self, input_tensor, class_idx): | |
| self.model.zero_grad() | |
| output = self.model(input_tensor) | |
| score = output[:, class_idx].squeeze() | |
| score.backward(retain_graph=True) | |
| gradient = self.gradients.cpu().data.numpy()[0] | |
| activation = self.activations.cpu().data.numpy()[0] | |
| weights = np.mean(gradient, axis=(1, 2)) | |
| cam = np.zeros(activation.shape[1:], dtype=np.float32) | |
| for i, w in enumerate(weights): | |
| cam += w * activation[i, :, :] | |
| cam = np.maximum(cam, 0) | |
| cam = cv2.resize(cam, (input_tensor.size(2), input_tensor.size(3))) | |
| cam -= np.min(cam) | |
| cam /= np.max(cam) | |
| return cam | |
| def overlay_heatmap_on_image(heatmap, image, alpha=0.4, colormap=cv2.COLORMAP_JET): | |
| heatmap = np.uint8(255 * heatmap) | |
| heatmap = cv2.applyColorMap(heatmap, colormap) | |
| overlayed_img = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0) | |
| return overlayed_img | |
| # Set up the model | |
| def setup_model(): | |
| cfg = get_cfg() | |
| cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) | |
| cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model | |
| cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") | |
| cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available | |
| predictor = DefaultPredictor(cfg) | |
| return predictor | |
| # Function to segment image | |
| def segment_image(image): | |
| image = np.array(image) | |
| outputs = predictor(image) | |
| instances = outputs["instances"] | |
| pred_classes = instances.pred_classes | |
| car_class_id = 2 # COCO class id for car | |
| # Check if there are any cars detected | |
| car_indices = [i for i, x in enumerate(pred_classes) if x == car_class_id] | |
| if len(car_indices) == 0: | |
| return "No automobiles found in the image", None, None, None, None, None, None | |
| # Find the largest car instance | |
| largest_car_index = max(car_indices, key=lambda i: instances.pred_masks[i].sum().item()) | |
| car_mask = instances.pred_masks[largest_car_index].cpu().numpy() | |
| # Create a white background image | |
| white_bg = np.ones_like(image) * 255 | |
| # Extract car region | |
| car_region = np.where(car_mask[:, :, None], image, white_bg) | |
| # Get bounding box coordinates | |
| y_indices, x_indices = np.where(car_mask) | |
| y_min, y_max = y_indices.min(), y_indices.max() | |
| x_min, x_max = x_indices.min(), x_indices.max() | |
| # Crop the car region | |
| cropped_car = car_region[y_min:y_max, x_min:x_max] | |
| # Convert cropped image to PIL format for display | |
| cropped_car_pil = Image.fromarray(cropped_car.astype('uint8'), 'RGB') | |
| # Preprocess the cropped car image for the model | |
| transform = transforms.Compose([ | |
| transforms.Resize((224, 224)), | |
| transforms.ToTensor(), | |
| transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) | |
| ]) | |
| input_tensor = transform(cropped_car_pil).unsqueeze(0).to(device) | |
| # Load models | |
| modernity_model = models.resnet18(pretrained=True) | |
| modernity_model.fc = nn.Linear(modernity_model.fc.in_features, 5) | |
| modernity_checkpoint = torch.load('modernity.pth', map_location=device) | |
| modernity_model.load_state_dict(modernity_checkpoint) | |
| modernity_model.to(device) | |
| modernity_model.eval() | |
| typicality_model = models.resnet18(pretrained=True) | |
| typicality_model.fc = nn.Linear(typicality_model.fc.in_features, 5) | |
| typicality_checkpoint = torch.load('typicality.pth', map_location=device) | |
| typicality_model.load_state_dict(typicality_checkpoint) | |
| typicality_model.to(device) | |
| typicality_model.eval() | |
| activation = {} | |
| def get_activation(name): | |
| def hook(model, input, output): | |
| activation[name] = output.detach() | |
| return hook | |
| hook_handle = typicality_model.avgpool.register_forward_hook(get_activation('avgpool')) | |
| with torch.no_grad(): | |
| #image = image.to(device) | |
| typicality_output = typicality_model(input_tensor) | |
| typicality_features = activation['avgpool'].cpu().numpy() | |
| hook_handle.remove() | |
| # Function to calculate similarity for a new feature without specific body_type and model_year | |
| def calculate_similarity_with_all_groups(new_feature): | |
| similarities = [] | |
| for key, morph in morphs.items(): | |
| similarity = cosine_similarity(morph.reshape(1, -1), new_feature.reshape(1, -1))[0][0] | |
| similarities.append((similarity, key)) | |
| similarities.sort(reverse=True, key=lambda x: x[0]) | |
| return similarities | |
| similarity_scores = calculate_similarity_with_all_groups(typicality_features) | |
| typicality_scores = similarity_scores[0][0] | |
| most_similar_group = similarity_scores[0][1] | |
| # Print the most similar group | |
| print(typicality_scores,most_similar_group) | |
| # Calculate modernity scores | |
| with torch.no_grad(): | |
| modernity_output = modernity_model(input_tensor) | |
| def calculate_modernity_scores(outputs, year_categories): | |
| probabilities = nn.functional.softmax(outputs, dim=1) | |
| modernity_scores = (probabilities * year_categories).sum(dim=1) | |
| return modernity_scores | |
| year_categories = torch.tensor([0, 1, 2, 3, 4], dtype=torch.float32).to(device) | |
| modernity_scores = calculate_modernity_scores(modernity_output, year_categories).item() | |
| target_layer = modernity_model.layer4[-1] | |
| modernity_cam = GradCAM(modernity_model, target_layer) | |
| modernity_heatmap = modernity_cam(input_tensor, class_idx=torch.argmax(modernity_output).item()) | |
| target_layer = typicality_model.layer4[-1] | |
| typicality_cam = GradCAM(typicality_model, target_layer) | |
| typicality_heatmap = typicality_cam(input_tensor, class_idx=torch.argmax(typicality_output).item()) | |
| # Convert the input image to a format suitable for overlaying | |
| img_np = np.array(cropped_car_pil) | |
| img_np = cv2.resize(img_np, (224, 224)) | |
| # Overlay the heatmap on the image | |
| overlayed_img_modernity = overlay_heatmap_on_image(modernity_heatmap, img_np) | |
| overlayed_img_typicality = overlay_heatmap_on_image(typicality_heatmap, img_np) | |
| # Convert overlayed images back to PIL for saving | |
| overlayed_img_modernity_pil = Image.fromarray(cv2.cvtColor(overlayed_img_modernity, cv2.COLOR_BGR2RGB)) | |
| overlayed_img_typicality_pil = Image.fromarray(cv2.cvtColor(overlayed_img_typicality, cv2.COLOR_BGR2RGB)) | |
| return "Automobiles detected in the image", cropped_car_pil, modernity_scores, typicality_scores , most_similar_group, overlayed_img_modernity_pil, overlayed_img_typicality_pil | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=segment_image, | |
| inputs= gr.Image(type="pil", label="Upload Image"), | |
| outputs=[ | |
| gr.Textbox(label="Output"), | |
| gr.Image(label="Cropped Car Image"), | |
| gr.Textbox(label="Modernity Score"), | |
| gr.Textbox(label="Typicality Score"), | |
| gr.Textbox(label="most_similar_group"), | |
| gr.Image(label="Grad-CAM for Type"), | |
| gr.Image(label="Grad-CAM for Year") | |
| ], | |
| title="Automobile Detection and Scoring using Mask R-CNN", | |
| description="Upload an image, and the system will detect and segment automobiles, crop the largest car, and predict its modernity and typicality scores. Grad-CAM heatmaps will also be generated." | |
| ) | |
| iface.launch() | |