car / app.py
basementparking's picture
Update app.py
6594eec verified
from PIL import Image
import sys
import os
import distutils.core
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torchvision import transforms, models
import gradio as gr
sys.path.insert(0, os.path.abspath('./detectron2-main'))
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
import os, json, cv2, random
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
import torch
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms, models
import torch.nn as nn
from sklearn.metrics.pairwise import cosine_similarity
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dist = distutils.core.run_setup("./detectron2-main/setup.py")
def setup_model():
cfg = get_cfg()
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
return predictor
predictor = setup_model()
SEED = 2024
torch.manual_seed(SEED)
np.random.seed(SEED)
df = pd.read_csv('morphs.csv')
morphs = {}
for _, row in df.iterrows():
bodytype = row['BodyType']
modelyear = row['ModelYear']
features_str = row['Features']
features = np.array(list(map(float, features_str.split(','))))
morphs[(bodytype, modelyear)] = features
# Define GradCAM class
class GradCAM:
def __init__(self, model, target_layer):
self.model = model
self.target_layer = target_layer
self.gradients = None
self.activations = None
self.hook_a = self.target_layer.register_forward_hook(self.save_activation)
self.hook_g = self.target_layer.register_backward_hook(self.save_gradient)
def save_activation(self, module, input, output):
self.activations = output
def save_gradient(self, module, grad_in, grad_out):
self.gradients = grad_out[0]
def __call__(self, input_tensor, class_idx):
self.model.zero_grad()
output = self.model(input_tensor)
score = output[:, class_idx].squeeze()
score.backward(retain_graph=True)
gradient = self.gradients.cpu().data.numpy()[0]
activation = self.activations.cpu().data.numpy()[0]
weights = np.mean(gradient, axis=(1, 2))
cam = np.zeros(activation.shape[1:], dtype=np.float32)
for i, w in enumerate(weights):
cam += w * activation[i, :, :]
cam = np.maximum(cam, 0)
cam = cv2.resize(cam, (input_tensor.size(2), input_tensor.size(3)))
cam -= np.min(cam)
cam /= np.max(cam)
return cam
def overlay_heatmap_on_image(heatmap, image, alpha=0.4, colormap=cv2.COLORMAP_JET):
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, colormap)
overlayed_img = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)
return overlayed_img
# Set up the model
def setup_model():
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available
predictor = DefaultPredictor(cfg)
return predictor
# Function to segment image
def segment_image(image):
image = np.array(image)
outputs = predictor(image)
instances = outputs["instances"]
pred_classes = instances.pred_classes
car_class_id = 2 # COCO class id for car
# Check if there are any cars detected
car_indices = [i for i, x in enumerate(pred_classes) if x == car_class_id]
if len(car_indices) == 0:
return "No automobiles found in the image", None, None, None, None, None, None
# Find the largest car instance
largest_car_index = max(car_indices, key=lambda i: instances.pred_masks[i].sum().item())
car_mask = instances.pred_masks[largest_car_index].cpu().numpy()
# Create a white background image
white_bg = np.ones_like(image) * 255
# Extract car region
car_region = np.where(car_mask[:, :, None], image, white_bg)
# Get bounding box coordinates
y_indices, x_indices = np.where(car_mask)
y_min, y_max = y_indices.min(), y_indices.max()
x_min, x_max = x_indices.min(), x_indices.max()
# Crop the car region
cropped_car = car_region[y_min:y_max, x_min:x_max]
# Convert cropped image to PIL format for display
cropped_car_pil = Image.fromarray(cropped_car.astype('uint8'), 'RGB')
# Preprocess the cropped car image for the model
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
input_tensor = transform(cropped_car_pil).unsqueeze(0).to(device)
# Load models
modernity_model = models.resnet18(pretrained=True)
modernity_model.fc = nn.Linear(modernity_model.fc.in_features, 5)
modernity_checkpoint = torch.load('modernity.pth', map_location=device)
modernity_model.load_state_dict(modernity_checkpoint)
modernity_model.to(device)
modernity_model.eval()
typicality_model = models.resnet18(pretrained=True)
typicality_model.fc = nn.Linear(typicality_model.fc.in_features, 5)
typicality_checkpoint = torch.load('typicality.pth', map_location=device)
typicality_model.load_state_dict(typicality_checkpoint)
typicality_model.to(device)
typicality_model.eval()
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
hook_handle = typicality_model.avgpool.register_forward_hook(get_activation('avgpool'))
with torch.no_grad():
#image = image.to(device)
typicality_output = typicality_model(input_tensor)
typicality_features = activation['avgpool'].cpu().numpy()
hook_handle.remove()
# Function to calculate similarity for a new feature without specific body_type and model_year
def calculate_similarity_with_all_groups(new_feature):
similarities = []
for key, morph in morphs.items():
similarity = cosine_similarity(morph.reshape(1, -1), new_feature.reshape(1, -1))[0][0]
similarities.append((similarity, key))
similarities.sort(reverse=True, key=lambda x: x[0])
return similarities
similarity_scores = calculate_similarity_with_all_groups(typicality_features)
typicality_scores = similarity_scores[0][0]
most_similar_group = similarity_scores[0][1]
# Print the most similar group
print(typicality_scores,most_similar_group)
# Calculate modernity scores
with torch.no_grad():
modernity_output = modernity_model(input_tensor)
def calculate_modernity_scores(outputs, year_categories):
probabilities = nn.functional.softmax(outputs, dim=1)
modernity_scores = (probabilities * year_categories).sum(dim=1)
return modernity_scores
year_categories = torch.tensor([0, 1, 2, 3, 4], dtype=torch.float32).to(device)
modernity_scores = calculate_modernity_scores(modernity_output, year_categories).item()
target_layer = modernity_model.layer4[-1]
modernity_cam = GradCAM(modernity_model, target_layer)
modernity_heatmap = modernity_cam(input_tensor, class_idx=torch.argmax(modernity_output).item())
target_layer = typicality_model.layer4[-1]
typicality_cam = GradCAM(typicality_model, target_layer)
typicality_heatmap = typicality_cam(input_tensor, class_idx=torch.argmax(typicality_output).item())
# Convert the input image to a format suitable for overlaying
img_np = np.array(cropped_car_pil)
img_np = cv2.resize(img_np, (224, 224))
# Overlay the heatmap on the image
overlayed_img_modernity = overlay_heatmap_on_image(modernity_heatmap, img_np)
overlayed_img_typicality = overlay_heatmap_on_image(typicality_heatmap, img_np)
# Convert overlayed images back to PIL for saving
overlayed_img_modernity_pil = Image.fromarray(cv2.cvtColor(overlayed_img_modernity, cv2.COLOR_BGR2RGB))
overlayed_img_typicality_pil = Image.fromarray(cv2.cvtColor(overlayed_img_typicality, cv2.COLOR_BGR2RGB))
return "Automobiles detected in the image", cropped_car_pil, modernity_scores, typicality_scores , most_similar_group, overlayed_img_modernity_pil, overlayed_img_typicality_pil
# Create Gradio interface
iface = gr.Interface(
fn=segment_image,
inputs= gr.Image(type="pil", label="Upload Image"),
outputs=[
gr.Textbox(label="Output"),
gr.Image(label="Cropped Car Image"),
gr.Textbox(label="Modernity Score"),
gr.Textbox(label="Typicality Score"),
gr.Textbox(label="most_similar_group"),
gr.Image(label="Grad-CAM for Type"),
gr.Image(label="Grad-CAM for Year")
],
title="Automobile Detection and Scoring using Mask R-CNN",
description="Upload an image, and the system will detect and segment automobiles, crop the largest car, and predict its modernity and typicality scores. Grad-CAM heatmaps will also be generated."
)
iface.launch()