import torch import gradio as gr from dl_hub.YOLO_V3 import config import torch import torch.optim as optim import os from dl_hub.YOLO_V3.PL_model import LitYOLOv3 from dl_hub.YOLO_V3.yolo_v3_utils.PL_data_module import YOLODataModule from utils import * import warnings import albumentations as A import albumentations.augmentations as AA from albumentations.pytorch import ToTensorV2 import cv2 import numpy as np from PIL import Image warnings.filterwarnings("ignore") def scale_input_image(input_image): original_shape = input_image.shape im = Image.fromarray(input_image) newsize = (config.IMAGE_SIZE, config.IMAGE_SIZE) im_resized = im.resize(newsize) input_image_resized = np.array(im_resized) return input_image_resized, original_shape def get_transformed_image(input_image): test_transforms = A.Compose( [ #A.LongestMaxSize(max_size=config.IMAGE_SIZE), AA.geometric.resize.Resize (config.IMAGE_SIZE, config.IMAGE_SIZE, interpolation=2, always_apply=True, p=1), #A.PadIfNeeded( # min_height=config.IMAGE_SIZE, min_width=config.IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT #), A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,), ToTensorV2(), ]) augmentations = test_transforms(image=input_image) input_tensor = augmentations["image"] input_tensor = input_tensor.unsqueeze(dim=0) input_tensor = input_tensor.to(config.DEVICE) return input_tensor def get_detection_output(input_image, cam, targets, conf_threshold, iou_threshold, renormalize_cam, image_weight): input_tensor = get_transformed_image(input_image) boxes_list, classes_list, labels_list, confidences_list = predict_single_image(input_tensor, yolo_model_trained, scaled_anchors, config.DEVICE, thresh=conf_threshold, iou_thresh=iou_threshold) print(boxes_list) print(classes_list) print(labels_list) print(confidences_list) grayscale_cam_input_image = cam(input_tensor, targets=targets) input_image_np, original_shape = scale_input_image(input_image) input_image_np = input_image_np/255. original_width, original_height, _ = original_shape # get the composite image composite_img = get_eigen_cam_image_overlay(boxes_list, input_image_np, grayscale_cam_input_image.squeeze().T, renormalize=renormalize_cam, image_weight=image_weight) # export image with bounding boxes composite_img_w_bb = export_image_with_bounding_box(composite_img, boxes_list, labels_list, classes_list, confidences_list) composite_img_w_bb_resized = composite_img_w_bb.resize((original_height, original_width)) # detection with bounding boxes # convert original image with detected bounding boxes print(input_image_np.shape) img_cv = cv2.cvtColor(np.uint8(input_image_np*255), cv2.COLOR_BGR2RGB) image_w_bb = export_image_with_bounding_box(img_cv, boxes_list, labels_list, classes_list, confidences_list) image_w_bb_resized = image_w_bb.resize((original_height, original_width)) return image_w_bb_resized, composite_img_w_bb_resized def detect_samples(): """ Run inference on one batch of test images """ sample_detections_list = [] sample_grad_cam_maps_list = [] input_tensor, image_float_np, boxes, classes, labels, confidences = predict(yolo_model_trained, test_data_loader, scaled_anchors, device = config.DEVICE, thresh=0.6, iou_thresh=0.3) grayscale_cam = cam(input_tensor, targets=targets) for cnt in range(input_tensor.shape[0]): # get the composite image composite_img = get_eigen_cam_image_overlay(boxes[cnt], image_float_np[cnt].squeeze().transpose((1,2,0)), grayscale_cam[cnt].squeeze().T, renormalize=False, image_weight=0.7) # export image with bounding boxes composite_img_w_bb = export_image_with_bounding_box(composite_img, boxes[cnt], labels[cnt], classes[cnt], confidences[cnt]) sample_grad_cam_maps_list.append(composite_img_w_bb) # convert original image with detected bounding boxes img_cv = cv2.cvtColor(np.uint8(image_float_np[cnt].squeeze().transpose((1,2,0))*255), cv2.COLOR_BGR2RGB) image_w_bb = export_image_with_bounding_box(img_cv, boxes[cnt], labels[cnt], classes[cnt], confidences[cnt]) sample_detections_list.append(image_w_bb) return sample_detections_list, sample_grad_cam_maps_list def yolo_predictor(input_image, conf_threshold=0.6, iou_threshold=0.3, renormalize_cam=False, image_weight=0.7, show_sample_gallery = True): """ Perform prediction and show grad cam output """ # Get the predicted output detection_output, grad_cam_output = get_detection_output(input_image, cam, targets, conf_threshold, iou_threshold, renormalize_cam, image_weight) if show_sample_gallery: return [detection_output, grad_cam_output, sample_detections_list, sample_grad_cam_maps_list] else: return [detection_output, grad_cam_output, [], []] # Define data module csv_files = [os.path.join(config.DATASET, "25examples.csv"),os.path.join(config.DATASET,"25examples.csv")] train_transforms=config.train_transforms test_transforms=config.test_transforms val_transforms = test_transforms IMAGE_SIZE = config.IMAGE_SIZE S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8] img_dir=config.IMG_DIR label_dir=config.LABEL_DIR anchors=config.ANCHORS batch_size = 8 yolo_dm = YOLODataModule( csv_files, img_dir, label_dir, anchors, batch_size, image_size=IMAGE_SIZE, S=S, C=20, train_transforms = train_transforms, val_transforms = val_transforms, test_transforms = test_transforms, val_split=0.1, num_workers = config.NUM_WORKERS, use_mosaic_on_train=True, mosaic_probability=0.75, pin_memory = False) yolo_dm.prepare_data() yolo_dm.setup() # Model path model_path = "75_mosaic_checkpoint_epoch_39_batch_size_16.ckpt" # Define model parameters scaled_anchors = ( torch.tensor(config.ANCHORS) * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2) ).to(config.DEVICE) train_data_loader = yolo_dm.train_dataloader() test_data_loader = yolo_dm.test_dataloader() # Define Model yolo_model_trained = LitYOLOv3.load_from_checkpoint(model_path, map_location='cpu') # get detection on samples # Target layer for lowest scale prediction: yolo_model_trained.model.layers[-1].pred[0] # Target layer for 2nd lowest scale prediction: yolo_model_trained.model.layers[22].pred[0] # Target layer for highest scale prediction: yolo_model_trained.model.layers[15].pred[0] target_layers = [yolo_model_trained.model.layers[15].pred[0], yolo_model_trained.model.layers[22].pred[0], yolo_model_trained.model.layers[-1].pred[0]] targets = [YOLOv3Target()] cam = EigenCAM(yolo_model_trained, target_layers, use_cuda=torch.cuda.is_available(), reshape_transform=None) sample_detections_list, sample_grad_cam_maps_list = detect_samples() # Define Interface description = 'This space hosts the Yolo-v3 object detection model, was trained on PASCAL dataset. This space gives the outputs (object detected with bounding box) and the EigenCAM outputs.' #title = 'Interface to perform Object detection using YOLOv3 algorithm' title = 'Object Detection - YOLO-V3 backend' yolo_examples = [['images/cycle.jpg', 0.6, 0.3, False, 0.8, True], ['images/bus.webp', 0.6, 0.3, False, 0.8, True], ['images/train.jpg', 0.6, 0.3, False, 0.8, True], ['images/motorcycle.jpg', 0.6, 0.3, False, 0.8, True], ['images/boat.jpg', 0.6, 0.3, False, 0.8, True], ['images/car.jpg', 0.6, 0.3, False, 0.8, True]] demo = gr.Interface(yolo_predictor, inputs = [#gr.Image(shape=(config.IMAGE_SIZE, config.IMAGE_SIZE)), gr.Image(), gr.Slider(0,1.,0.6, label="Confidence Threshold"), gr.Slider(0,1.,0.3, label="IOU Threshold"), gr.Checkbox(False, label="Renormalize activations to bounding boxes"), gr.Slider(0, 1,0.8,label="Activation Overlay Opacity"), gr.Checkbox(True, label="Show Sample Images"), ], outputs = [gr.Image(label='Detection Output'),#.style(width=512, height=512), gr.Image(label='Eigen CAM'),#.style(width=config.IMAGE_SIZE, height=config.IMAGE_SIZE), gr.Gallery(label='Sample Detections', preview=True, show_label=True, height=300, scale=1), gr.Gallery(label='Grad CAM of sample detections', preview=True, show_label=True, height=300, scale=1), ], examples=yolo_examples, title = title, description = description ) demo.launch(debug=False)