rohithb's picture
Update app.py
66ccd43
import torch
import gradio as gr
from dl_hub.YOLO_V3 import config
import torch
import torch.optim as optim
import os
from dl_hub.YOLO_V3.PL_model import LitYOLOv3
from dl_hub.YOLO_V3.yolo_v3_utils.PL_data_module import YOLODataModule
from utils import *
import warnings
import albumentations as A
import albumentations.augmentations as AA
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
from PIL import Image
warnings.filterwarnings("ignore")
def scale_input_image(input_image):
original_shape = input_image.shape
im = Image.fromarray(input_image)
newsize = (config.IMAGE_SIZE, config.IMAGE_SIZE)
im_resized = im.resize(newsize)
input_image_resized = np.array(im_resized)
return input_image_resized, original_shape
def get_transformed_image(input_image):
test_transforms = A.Compose(
[
#A.LongestMaxSize(max_size=config.IMAGE_SIZE),
AA.geometric.resize.Resize (config.IMAGE_SIZE, config.IMAGE_SIZE, interpolation=2, always_apply=True, p=1),
#A.PadIfNeeded(
# min_height=config.IMAGE_SIZE, min_width=config.IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
#),
A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
ToTensorV2(),
])
augmentations = test_transforms(image=input_image)
input_tensor = augmentations["image"]
input_tensor = input_tensor.unsqueeze(dim=0)
input_tensor = input_tensor.to(config.DEVICE)
return input_tensor
def get_detection_output(input_image, cam, targets, conf_threshold, iou_threshold, renormalize_cam, image_weight):
input_tensor = get_transformed_image(input_image)
boxes_list, classes_list, labels_list, confidences_list = predict_single_image(input_tensor,
yolo_model_trained,
scaled_anchors,
config.DEVICE,
thresh=conf_threshold,
iou_thresh=iou_threshold)
print(boxes_list)
print(classes_list)
print(labels_list)
print(confidences_list)
grayscale_cam_input_image = cam(input_tensor, targets=targets)
input_image_np, original_shape = scale_input_image(input_image)
input_image_np = input_image_np/255.
original_width, original_height, _ = original_shape
# get the composite image
composite_img = get_eigen_cam_image_overlay(boxes_list,
input_image_np,
grayscale_cam_input_image.squeeze().T,
renormalize=renormalize_cam,
image_weight=image_weight)
# export image with bounding boxes
composite_img_w_bb = export_image_with_bounding_box(composite_img,
boxes_list,
labels_list,
classes_list,
confidences_list)
composite_img_w_bb_resized = composite_img_w_bb.resize((original_height, original_width))
# detection with bounding boxes
# convert original image with detected bounding boxes
print(input_image_np.shape)
img_cv = cv2.cvtColor(np.uint8(input_image_np*255), cv2.COLOR_BGR2RGB)
image_w_bb = export_image_with_bounding_box(img_cv,
boxes_list,
labels_list,
classes_list,
confidences_list)
image_w_bb_resized = image_w_bb.resize((original_height, original_width))
return image_w_bb_resized, composite_img_w_bb_resized
def detect_samples():
"""
Run inference on one batch of test images
"""
sample_detections_list = []
sample_grad_cam_maps_list = []
input_tensor, image_float_np, boxes, classes, labels, confidences = predict(yolo_model_trained,
test_data_loader,
scaled_anchors,
device = config.DEVICE,
thresh=0.6,
iou_thresh=0.3)
grayscale_cam = cam(input_tensor, targets=targets)
for cnt in range(input_tensor.shape[0]):
# get the composite image
composite_img = get_eigen_cam_image_overlay(boxes[cnt],
image_float_np[cnt].squeeze().transpose((1,2,0)),
grayscale_cam[cnt].squeeze().T,
renormalize=False,
image_weight=0.7)
# export image with bounding boxes
composite_img_w_bb = export_image_with_bounding_box(composite_img,
boxes[cnt],
labels[cnt],
classes[cnt],
confidences[cnt])
sample_grad_cam_maps_list.append(composite_img_w_bb)
# convert original image with detected bounding boxes
img_cv = cv2.cvtColor(np.uint8(image_float_np[cnt].squeeze().transpose((1,2,0))*255), cv2.COLOR_BGR2RGB)
image_w_bb = export_image_with_bounding_box(img_cv,
boxes[cnt],
labels[cnt],
classes[cnt],
confidences[cnt])
sample_detections_list.append(image_w_bb)
return sample_detections_list, sample_grad_cam_maps_list
def yolo_predictor(input_image,
conf_threshold=0.6,
iou_threshold=0.3,
renormalize_cam=False,
image_weight=0.7,
show_sample_gallery = True):
"""
Perform prediction and show grad cam output
"""
# Get the predicted output
detection_output, grad_cam_output = get_detection_output(input_image, cam, targets, conf_threshold, iou_threshold, renormalize_cam, image_weight)
if show_sample_gallery:
return [detection_output, grad_cam_output, sample_detections_list, sample_grad_cam_maps_list]
else:
return [detection_output, grad_cam_output, [], []]
# Define data module
csv_files = [os.path.join(config.DATASET, "25examples.csv"),os.path.join(config.DATASET,"25examples.csv")]
train_transforms=config.train_transforms
test_transforms=config.test_transforms
val_transforms = test_transforms
IMAGE_SIZE = config.IMAGE_SIZE
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8]
img_dir=config.IMG_DIR
label_dir=config.LABEL_DIR
anchors=config.ANCHORS
batch_size = 8
yolo_dm = YOLODataModule(
csv_files,
img_dir,
label_dir,
anchors,
batch_size,
image_size=IMAGE_SIZE,
S=S,
C=20,
train_transforms = train_transforms,
val_transforms = val_transforms,
test_transforms = test_transforms,
val_split=0.1,
num_workers = config.NUM_WORKERS,
use_mosaic_on_train=True,
mosaic_probability=0.75,
pin_memory = False)
yolo_dm.prepare_data()
yolo_dm.setup()
# Model path
model_path = "75_mosaic_checkpoint_epoch_39_batch_size_16.ckpt"
# Define model parameters
scaled_anchors = (
torch.tensor(config.ANCHORS)
* torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
).to(config.DEVICE)
train_data_loader = yolo_dm.train_dataloader()
test_data_loader = yolo_dm.test_dataloader()
# Define Model
yolo_model_trained = LitYOLOv3.load_from_checkpoint(model_path, map_location='cpu')
# get detection on samples
# Target layer for lowest scale prediction: yolo_model_trained.model.layers[-1].pred[0]
# Target layer for 2nd lowest scale prediction: yolo_model_trained.model.layers[22].pred[0]
# Target layer for highest scale prediction: yolo_model_trained.model.layers[15].pred[0]
target_layers = [yolo_model_trained.model.layers[15].pred[0],
yolo_model_trained.model.layers[22].pred[0],
yolo_model_trained.model.layers[-1].pred[0]]
targets = [YOLOv3Target()]
cam = EigenCAM(yolo_model_trained,
target_layers,
use_cuda=torch.cuda.is_available(),
reshape_transform=None)
sample_detections_list, sample_grad_cam_maps_list = detect_samples()
# Define Interface
description = 'This space hosts the Yolo-v3 object detection model, was trained on PASCAL dataset. This space gives the outputs (object detected with bounding box) and the EigenCAM outputs.'
#title = 'Interface to perform Object detection using YOLOv3 algorithm'
title = 'Object Detection - YOLO-V3 backend'
yolo_examples = [['images/cycle.jpg', 0.6, 0.3, False, 0.8, True],
['images/bus.webp', 0.6, 0.3, False, 0.8, True],
['images/train.jpg', 0.6, 0.3, False, 0.8, True],
['images/motorcycle.jpg', 0.6, 0.3, False, 0.8, True],
['images/boat.jpg', 0.6, 0.3, False, 0.8, True],
['images/car.jpg', 0.6, 0.3, False, 0.8, True]]
demo = gr.Interface(yolo_predictor,
inputs = [#gr.Image(shape=(config.IMAGE_SIZE, config.IMAGE_SIZE)),
gr.Image(),
gr.Slider(0,1.,0.6, label="Confidence Threshold"),
gr.Slider(0,1.,0.3, label="IOU Threshold"),
gr.Checkbox(False, label="Renormalize activations to bounding boxes"),
gr.Slider(0, 1,0.8,label="Activation Overlay Opacity"),
gr.Checkbox(True, label="Show Sample Images"),
],
outputs = [gr.Image(label='Detection Output'),#.style(width=512, height=512),
gr.Image(label='Eigen CAM'),#.style(width=config.IMAGE_SIZE, height=config.IMAGE_SIZE),
gr.Gallery(label='Sample Detections', preview=True, show_label=True, height=300, scale=1),
gr.Gallery(label='Grad CAM of sample detections', preview=True, show_label=True, height=300, scale=1),
],
examples=yolo_examples,
title = title,
description = description
)
demo.launch(debug=False)