| import itertools |
|
|
| import config as config |
| import cv2 |
| import gradio as gr |
| import matplotlib.patches as patches |
| import matplotlib.pyplot as plt |
| import numpy as np |
| import torch |
| import torchvision |
| import utils |
| from loss import YoloLoss |
| from model import YOLOv3 |
| from PIL import Image |
| from torch.utils.data import DataLoader |
| from torchvision import transforms |
| from utils import get_loaders |
|
|
| new_state_dict = {} |
| state_dict = torch.load('model/Yolov3_Shashank.pth', map_location=torch.device('cpu')) |
| for key, value in state_dict.items(): |
| new_key = key.replace('model.', '') |
| new_state_dict[new_key] = value |
|
|
| model = YOLOv3(in_channels=3, num_classes=config.NUM_CLASSES) |
| model.load_state_dict(new_state_dict, strict=True) |
| model.eval() |
|
|
| classes = ("aeroplane", |
| "bicycle", |
| "bird", |
| "boat", |
| "bottle", |
| "bus", |
| "car", |
| "cat", |
| "chair", |
| "cow", |
| "diningtable", |
| "dog", |
| "horse", |
| "motorbike", |
| "person", |
| "pottedplant", |
| "sheep", |
| "sofa", |
| "train", |
| "tvmonitor") |
|
|
|
|
| import grad_cam_func as gcf |
| from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients |
| from pytorch_grad_cam.utils.image import show_cam_on_image |
| from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget |
|
|
|
|
| def inference(input_img=None, iou_threshold=0.6, conf_threshold=0.5, gc_trans=0.3): |
| |
| if input_img is not None: |
| |
| tranform_img = config.infer_transforms(image=input_img) |
| transform_img = tranform_img['image'].unsqueeze(0) |
| |
| transform_img_visual = config.infer_transforms_visualization(image=input_img)['image'] |
| |
| with torch.no_grad(): |
| outputs = model(transform_img) |
| bboxes = [[] for _ in range(transform_img.shape[0])] |
| |
| for i in range(3): |
| batch_size, A, S, _, _ = outputs[i].shape |
| anchor = np.array(config.SCALED_ANCHORS[i]) |
| boxes_scale_i = utils.cells_to_bboxes( |
| outputs[i], anchor, S=S, is_preds=True) |
| |
| for idx, (box) in enumerate(boxes_scale_i): |
| bboxes[idx] += box |
| |
| |
| nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=iou_threshold, |
| threshold=conf_threshold, box_format="midpoint",) |
| |
| |
| image, boxes = transform_img_visual.permute(1,2,0), nms_boxes |
| |
| """Plots predicted bounding boxes on the image""" |
| cmap = plt.get_cmap("tab20b") |
| class_labels = config.PASCAL_CLASSES |
| colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] |
| |
| im = np.array(image) |
| height, width, _ = im.shape |
|
|
| |
| fig, ax = plt.subplots(1) |
| |
| |
| ax.imshow(im) |
|
|
| |
| |
|
|
| |
| for box in boxes: |
| assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" |
| class_pred = box[0] |
| box = box[2:] |
| upper_left_x = box[0] - box[2] / 2 |
| upper_left_y = box[1] - box[3] / 2 |
| rect = patches.Rectangle( |
| (upper_left_x * width, upper_left_y * height), |
| box[2] * width, |
| box[3] * height, |
| linewidth=2, |
| edgecolor=colors[int(class_pred)], |
| facecolor="none", |
| ) |
| |
| ax.add_patch(rect) |
| plt.text( |
| upper_left_x * width, |
| upper_left_y * height, |
| s=class_labels[int(class_pred)], |
| color="white", |
| verticalalignment="top", |
| bbox={"color": colors[int(class_pred)], "pad": 0}, |
| ) |
| |
| plt.axis('off') |
| |
| fig.canvas.draw() |
| |
| fig_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) |
| fig_img = fig_img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) |
|
|
| plt.close(fig) |
| |
| outputs_inference_bb = fig_img |
| |
| |
| |
| target_layer = [model.layers[-2]] |
| cam = gcf.BaseCAM(model, target_layer) |
| |
| AnG = ActivationsAndGradients(model, target_layer, None) |
| outputs = AnG(transform_img) |
| |
| bboxes = [[] for _ in range(1)] |
| for i in range(3): |
| batch_size, A, S, _, _ = outputs[i].shape |
| anchor = config.SCALED_ANCHORS[i] |
| boxes_scale_i = utils.cells_to_bboxes( |
| outputs[i], anchor, S=S, is_preds=True |
| ) |
| for idx, (box) in enumerate(boxes_scale_i): |
| bboxes[idx] += box |
| |
| nms_boxes = utils.non_max_suppression( |
| bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint", |
| ) |
|
|
| target_categories = [box[0] for box in nms_boxes] |
| targets = [ClassifierOutputTarget( |
| category) for category in target_categories] |
| |
| help_ = cam.compute_cam_per_layer(transform_img, targets, False) |
| |
| output_gc = cam.aggregate_multi_layers(help_)[0, :, :] |
| |
| img = cv2.resize(input_img, (416, 416)) |
| img = np.float32(img) / 255 |
| cam_image = show_cam_on_image(img, output_gc, use_rgb=True, image_weight=gc_trans) |
| |
| outputs_inference_gc = cam_image |
| |
| else: |
| outputs_inference_bb = None |
| outputs_inference_gc = None |
|
|
| return outputs_inference_bb, outputs_inference_gc |
|
|
|
|
|
|
| title = "PASCAL VOC trained on Yolov3" |
| description = "A simple Gradio interface to infer on Yolov3 model, and get GradCAM results" |
| examples = [['examples/test_'+str(i)+'.jpg', 0.6, 0.5, 0.3] for i in range(10)] |
|
|
| demo = gr.Interface(inference, |
| inputs = [gr.Image(label="Input image"), |
| gr.Slider(0, 1, value=0.6, label="IOU Threshold"), |
| gr.Slider(0, 1, value=0.4, label="Threshold"), |
| gr.Slider(0, 1, value=0.5, label="GradCAM Transparency"), |
| ], |
| outputs = [ |
| gr.Image(label="Yolov3 Prediction"), |
| gr.Image(label="GradCAM Output"),], |
| title = title, |
| description = description, |
| examples = examples |
| ) |
| demo.launch() |