Spaces:

rohithb
/

YOLO_Obj_Detection

Build error

File size: 11,329 Bytes

import torch 
import gradio as gr 
from dl_hub.YOLO_V3 import config
import torch
import torch.optim as optim
import os
from dl_hub.YOLO_V3.PL_model import LitYOLOv3
from dl_hub.YOLO_V3.yolo_v3_utils.PL_data_module import YOLODataModule
from utils import *
import warnings
import albumentations as A
import albumentations.augmentations as AA
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
from PIL import Image

warnings.filterwarnings("ignore")

def scale_input_image(input_image):
    original_shape = input_image.shape
    im = Image.fromarray(input_image)
    newsize = (config.IMAGE_SIZE, config.IMAGE_SIZE)
    im_resized = im.resize(newsize)
    input_image_resized = np.array(im_resized)
    return input_image_resized, original_shape

def get_transformed_image(input_image):
    test_transforms = A.Compose(
    [
        #A.LongestMaxSize(max_size=config.IMAGE_SIZE),
        AA.geometric.resize.Resize (config.IMAGE_SIZE, config.IMAGE_SIZE, interpolation=2, always_apply=True, p=1),
        #A.PadIfNeeded(
        #    min_height=config.IMAGE_SIZE, min_width=config.IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
        #),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
        ToTensorV2(),
    ])

    augmentations = test_transforms(image=input_image)
    input_tensor = augmentations["image"]
    input_tensor = input_tensor.unsqueeze(dim=0)
    input_tensor = input_tensor.to(config.DEVICE)
    return input_tensor


def get_detection_output(input_image, cam, targets, conf_threshold, iou_threshold, renormalize_cam, image_weight):
    input_tensor = get_transformed_image(input_image)
    boxes_list, classes_list, labels_list, confidences_list = predict_single_image(input_tensor, 
                                                                                   yolo_model_trained, 
                                                                                   scaled_anchors, 
                                                                                   config.DEVICE, 
                                                                                   thresh=conf_threshold, 
                                                                                   iou_thresh=iou_threshold)
    print(boxes_list)
    print(classes_list)
    print(labels_list)
    print(confidences_list)
    grayscale_cam_input_image = cam(input_tensor, targets=targets)

    input_image_np, original_shape = scale_input_image(input_image)
    input_image_np = input_image_np/255.
    original_width, original_height, _ = original_shape
    
    # get the composite image
    composite_img = get_eigen_cam_image_overlay(boxes_list,
                                                input_image_np, 
                                                grayscale_cam_input_image.squeeze().T, 
                                                renormalize=renormalize_cam,
                                                image_weight=image_weight)
        
    # export image with bounding boxes 
    composite_img_w_bb = export_image_with_bounding_box(composite_img,
                                                        boxes_list, 
                                                        labels_list, 
                                                        classes_list,
                                                        confidences_list)
    composite_img_w_bb_resized = composite_img_w_bb.resize((original_height, original_width))
    
    
    

    # detection with bounding boxes
    # convert original image with detected bounding boxes
    print(input_image_np.shape)
    img_cv = cv2.cvtColor(np.uint8(input_image_np*255), cv2.COLOR_BGR2RGB)
    image_w_bb = export_image_with_bounding_box(img_cv, 
                                                boxes_list,
                                                labels_list,
                                                classes_list,
                                                confidences_list)
    image_w_bb_resized = image_w_bb.resize((original_height, original_width))
    
    return image_w_bb_resized, composite_img_w_bb_resized


def detect_samples():
    """
    Run inference on one batch of test images
    """
    sample_detections_list = []
    sample_grad_cam_maps_list = []
    input_tensor, image_float_np, boxes, classes, labels, confidences = predict(yolo_model_trained,
                                                                                test_data_loader,
                                                                                scaled_anchors,
                                                                                device = config.DEVICE,
                                                                                thresh=0.6,
                                                                                iou_thresh=0.3)
    grayscale_cam = cam(input_tensor, targets=targets)
    for cnt in range(input_tensor.shape[0]):
        # get the composite image
        composite_img = get_eigen_cam_image_overlay(boxes[cnt],
                                                    image_float_np[cnt].squeeze().transpose((1,2,0)), 
                                                    grayscale_cam[cnt].squeeze().T, 
                                                    renormalize=False,
                                                    image_weight=0.7)
        
        # export image with bounding boxes 
        composite_img_w_bb = export_image_with_bounding_box(composite_img,
                                                            boxes[cnt], 
                                                            labels[cnt], 
                                                            classes[cnt],
                                                            confidences[cnt])
        sample_grad_cam_maps_list.append(composite_img_w_bb)
        
        # convert original image with detected bounding boxes
        img_cv = cv2.cvtColor(np.uint8(image_float_np[cnt].squeeze().transpose((1,2,0))*255), cv2.COLOR_BGR2RGB)
        image_w_bb = export_image_with_bounding_box(img_cv, 
                                                    boxes[cnt],
                                                    labels[cnt],
                                                    classes[cnt],
                                                    confidences[cnt])
        sample_detections_list.append(image_w_bb)
    return sample_detections_list, sample_grad_cam_maps_list


def yolo_predictor(input_image, 
                      conf_threshold=0.6, 
                      iou_threshold=0.3,
                      renormalize_cam=False,
                      image_weight=0.7, 
                      show_sample_gallery = True):
    """
    Perform prediction and show grad cam output
    """

    # Get the predicted output
    detection_output, grad_cam_output = get_detection_output(input_image, cam, targets, conf_threshold, iou_threshold, renormalize_cam, image_weight)
  

    if show_sample_gallery:
        return [detection_output, grad_cam_output, sample_detections_list, sample_grad_cam_maps_list]
    else:
        return [detection_output, grad_cam_output, [], []]


# Define data module
csv_files = [os.path.join(config.DATASET, "25examples.csv"),os.path.join(config.DATASET,"25examples.csv")]
train_transforms=config.train_transforms
test_transforms=config.test_transforms
val_transforms = test_transforms
IMAGE_SIZE = config.IMAGE_SIZE
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8]
img_dir=config.IMG_DIR
label_dir=config.LABEL_DIR
anchors=config.ANCHORS
batch_size = 8
yolo_dm = YOLODataModule(
    csv_files,
    img_dir,
    label_dir,
    anchors,
    batch_size,
    image_size=IMAGE_SIZE,
    S=S,
    C=20,
    train_transforms = train_transforms,
    val_transforms = val_transforms,
    test_transforms = test_transforms,
    val_split=0.1,
    num_workers = config.NUM_WORKERS,
    use_mosaic_on_train=True,
    mosaic_probability=0.75,
    pin_memory = False)
yolo_dm.prepare_data()
yolo_dm.setup()

# Model path
model_path = "75_mosaic_checkpoint_epoch_39_batch_size_16.ckpt"

# Define model parameters
scaled_anchors = (
    torch.tensor(config.ANCHORS)
    * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
).to(config.DEVICE)
train_data_loader = yolo_dm.train_dataloader()
test_data_loader = yolo_dm.test_dataloader()

# Define Model
yolo_model_trained = LitYOLOv3.load_from_checkpoint(model_path, map_location='cpu')

# get detection on samples
                          
# Target layer for lowest scale prediction: yolo_model_trained.model.layers[-1].pred[0]
# Target layer for 2nd lowest scale prediction: yolo_model_trained.model.layers[22].pred[0]
# Target layer for highest scale prediction: yolo_model_trained.model.layers[15].pred[0]
target_layers = [yolo_model_trained.model.layers[15].pred[0],
                 yolo_model_trained.model.layers[22].pred[0],
                 yolo_model_trained.model.layers[-1].pred[0]]

targets = [YOLOv3Target()]
cam = EigenCAM(yolo_model_trained,
               target_layers,
               use_cuda=torch.cuda.is_available(),
               reshape_transform=None)
sample_detections_list, sample_grad_cam_maps_list = detect_samples()

# Define Interface
description = 'This space hosts the Yolo-v3 object detection model, was trained on PASCAL dataset. This space gives the outputs (object detected with bounding box) and the EigenCAM outputs.'
#title = 'Interface to perform Object detection using YOLOv3 algorithm'
title = 'Object Detection - YOLO-V3 backend'

yolo_examples = [['images/cycle.jpg', 0.6, 0.3, False, 0.8, True],
                ['images/bus.webp', 0.6, 0.3, False, 0.8, True],
                ['images/train.jpg', 0.6, 0.3, False, 0.8, True], 
                ['images/motorcycle.jpg', 0.6, 0.3, False, 0.8, True],  
                ['images/boat.jpg', 0.6, 0.3, False, 0.8, True],
                ['images/car.jpg', 0.6, 0.3, False, 0.8, True]]

demo = gr.Interface(yolo_predictor,
                    inputs = [#gr.Image(shape=(config.IMAGE_SIZE, config.IMAGE_SIZE)),
                              gr.Image(),
                              gr.Slider(0,1.,0.6, label="Confidence Threshold"),
                              gr.Slider(0,1.,0.3, label="IOU Threshold"),
                              gr.Checkbox(False, label="Renormalize activations to bounding boxes"),
                              gr.Slider(0, 1,0.8,label="Activation Overlay Opacity"),
                              gr.Checkbox(True, label="Show Sample Images"),
                             ],
                    outputs = [gr.Image(label='Detection Output'),#.style(width=512, height=512),
                               gr.Image(label='Eigen CAM'),#.style(width=config.IMAGE_SIZE, height=config.IMAGE_SIZE),
                               gr.Gallery(label='Sample Detections', preview=True, show_label=True, height=300, scale=1),
                               gr.Gallery(label='Grad CAM of sample detections', preview=True, show_label=True, height=300, scale=1),
                              ],
                    examples=yolo_examples,
                    title = title,
                    description = description
                   )
demo.launch(debug=False)