import torch import os ### Installations ### ##################### os.system('pip install git+https://github.com/facebookresearch/detectron2.git') ### Import Libraries ### ######################### # general import gradio as gr import numpy as np import cv2 from PIL import Image # Setup detectron2 logger import detectron2 from detectron2.utils.logger import setup_logger setup_logger() # import some common detectron2 utilities from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.data import MetadataCatalog, DatasetCatalog # import torchvision utilities import torchvision.models as models import torchvision.transforms as transforms import torch.nn.functional as F ### Detectron Model ### ####################### # Initialize and set to cpu cfg = get_cfg() cfg.MODEL.DEVICE='cpu' # Load Model cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model # Load pretrained weights cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") predictor = DefaultPredictor(cfg) # get labels metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) class_catalog = metadata.thing_classes ### ResNet18 Model ### ###################### pretrained_model = models.resnet18(pretrained=True) IN_FEATURES = pretrained_model.fc.in_features OUTPUT_DIM = 5 final_fc = torch.nn.Linear(IN_FEATURES, OUTPUT_DIM) pretrained_model.fc = final_fc # Load fine tuned weights pretrained_model.load_state_dict(torch.load('model_modernity_advanced.pt', map_location = 'cpu')) pretrained_model.eval() ### Test Transforms ### ####################### pretrained_size = 224 pretrained_means = [0.485, 0.456, 0.406] pretrained_stds = [0.229, 0.224, 0.225] test_transforms = transforms.Compose([ transforms.Resize(pretrained_size), transforms.ToTensor(), transforms.Normalize(mean = pretrained_means, std = pretrained_stds) ]) ### Car Modernity Function ### ############################## def modernity_pred(logits): p = F.softmax(logits, dim = 1) groups = torch.tensor([[0,1,2,3,4]]) score = (p * groups).sum(axis = 1) return score ### Image Classification function ### ##################################### def image_classifier(inp): ### Detect in full image ### ############################ # detectron prediction output = predictor(inp) instances = output['instances'] # assign class names classes = [] for i in instances.pred_classes.detach().cpu(): classes.append(class_catalog[i]) # select cars and pick largest according to pixel count of pred_mask is_car = np.array(classes) == 'car' # statement to check if car was detected in the image and proceed accordingly if is_car.any() == True: # select cars and pick largest according to pixel count of pred_mask pred_masks = instances.pred_masks[is_car].detach().cpu() idx_largest_car = pred_masks.reshape(pred_masks.shape[0], -1).sum(axis= 1).argmax() ### crop image by according region of interest ############################################## # extract region of interest pred_boxes = instances.pred_boxes[is_car][int(idx_largest_car)] box = list(pred_boxes)[0].detach().cpu().numpy() x_min = int(box[0]) y_min = int(box[1]) x_max = int(box[2]) y_max = int(box[3]) # crop image respectively crop_img = inp[y_min:y_max, x_min:x_max, :] ### Change Background to White ### ################################## # convert to PIL fromat cropped = Image.fromarray(crop_img.astype('uint8'), 'RGB') # select respective mask from model output pred_mask_crop = pred_masks[idx_largest_car].numpy() # convert to PIL format pred_mask_crop = Image.fromarray((pred_mask_crop * 255).astype('uint8')) #crop the pred_mask from model output pred_mask_crop = pred_mask_crop.crop((x_min, y_min, x_max, y_max)) # create white background s = np.array(pred_mask_crop).shape background = Image.fromarray(np.ones(shape = (s[0], s[1], 3), dtype = np.uint8) * 255, mode = 'RGB') # create alpha mask new_alpha_mask = Image.new('L', background.size, color = 0) new_alpha_mask.paste(pred_mask_crop) # bring both together composite = Image.composite(cropped, background, new_alpha_mask) ### Predict modernity img_trans = test_transforms(composite).unsqueeze(0) with torch.no_grad(): out = pretrained_model(img_trans) mod_score = modernity_pred(out) return composite, f'Modernity score: {round(float(mod_score), 5)}' else: message = 'no car was detected in image' # White image as place holder placeholder = Image.fromarray(np.ones(shape = (100, 150, 3), dtype = np.uint8) * 255, mode = 'RGB') return placeholder, message ### Gradio App ### ################## title = "Prediction of Car Modernity Score" description = "Upload image of car to get prediction of the modernity score. If image includes multiple cars, car with largest pixel count is extracted" examples = [['test_img_1.jpg'], ['test_img_2.jpg'], ['test_img_3.jpg'], ['test_img_4.jpg'], ['test_img_5.jpg'], ['test_img_6.jpeg'], ['test_img_7.jpeg']] classif_app = gr.Interface(fn=image_classifier, inputs="image", outputs=["image", "label"], title = title, description = description, examples = examples) classif_app.launch()