import gradio as gr from torchvision import datasets, transforms import albumentations as Al from albumentations.pytorch import ToTensorV2 from PIL import Image import matplotlib.pyplot as plt import numpy as np import pandas as pd from torch.optim.lr_scheduler import OneCycleLR from pytorch_lightning import LightningModule, Trainer, seed_everything from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint from pytorch_lightning.callbacks.progress import TQDMProgressBar from pytorch_lightning.loggers import CSVLogger,TensorBoardLogger from tqdm import tqdm import torch import torch.optim as optim import matplotlib import cv2 from pytorch_grad_cam import EigenCAM from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget from pytorch_grad_cam.utils.image import show_cam_on_image from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image # my files import utils import config from model import YOLOv3 from utils import ( mean_average_precision, cells_to_bboxes, get_evaluation_bboxes, save_checkpoint, load_checkpoint, check_class_accuracy, plot_couple_examples, accuracy_fn, get_loaders ) from loss import YoloLoss import litmodelclass # gradio model_stats = """ ### YoloV3 Model Implementation & Training Details Github Link: https://github.com/santule/ERA/tree/main/S13 #### Model Performance: 1. **Validation Loss: 6.05** 2. **Class accuracy: 82.4%** 3. **No obj accuracy: 98.05%** 4. **Obj accuracy: 72.3%** """ title = "Yolo3 trained on PASCAL_VOC with GradCAM" description = "Gradio interface to infer on Yolo3 model, and get GradCAM results" with gr.Blocks() as demo: gr.Markdown( """ # Yolo3 model trained on PASCAL_VOC dataset Demo! 20 Classes supported - aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor """ ) # example images examples = [["example_images/009948.jpg"],["example_images/000041.jpg"],["example_images/000042.jpg"],["example_images/000043.jpg"],["example_images/000044.jpg"],["example_images/000045.jpg"]] # colors for the bboxes cmap = plt.get_cmap("tab20b") class_labels = config.PASCAL_CLASSES colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] colors_hex = {class_labels[i]:matplotlib.colors.rgb2hex(colors[i]) for i in range(0,len(class_labels))} # consolidate the output from the model for gradcam to work def yolov3_reshape_transform(x): activations = [] size = x[0].size()[2:4] # 13 * 13 for x_item in x: x_permute = x_item.permute(0, 1, 4, 2, 3 ) # 1,3,25,13,13 x_permute = x_permute.reshape((x_permute.shape[0], x_permute.shape[1]*x_permute.shape[2], *x_permute.shape[3:])) # 1,75,13,13 activations.append(torch.nn.functional.interpolate(torch.abs(x_permute), size, mode='bilinear')) activations = torch.cat(activations, axis=1) # 1,255,13,13 return(activations) # main function of the app def yolo3_inference(input_img,gradcam=True,gradcam_opa=0.5,user_iou_threshold=0.6,user_threshold=0.5): # function for yolo inference # load model yololit = litmodelclass.LitYolo() inference_model = yololit.load_from_checkpoint("yolo3_improved_model.ckpt") # bboxes, gradcam anchors = (torch.tensor(config.ANCHORS) * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)) bboxes = [[]] sections = [] # to return image and annotations nms_boxes_output = [] # process the input image for inference/gradcam # input_img = cv2.resize(input_img, (416, 416)) # input_img_copy = input_img.copy() # input_img = np.float32(input_img) / 255 # transform = transforms.ToTensor() # input_img = transform(input_img).unsqueeze(0) # image transformation test_transforms = Al.Compose( [ Al.LongestMaxSize(max_size=416), Al.PadIfNeeded( min_height=416, min_width=416, border_mode=cv2.BORDER_CONSTANT ), Al.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,), #ToTensorV2(), ] ) input_img_copy = test_transforms(image=input_img)['image'] transform = transforms.ToTensor() input_img_tensor = transform(input_img_copy).unsqueeze(0) # infer the image inference_model.eval() test_img_out = inference_model(input_img_tensor) # process the outputs to create bounding boxes for i in range(3): batch_size, A, S, _, _ = test_img_out[i].shape # 1, anchors = 3, scaling = 13/26/52 anchor = anchors[i] boxes_scale_i = utils.cells_to_bboxes(test_img_out[i], anchor, S=S, is_preds=True) for idx, (box) in enumerate(boxes_scale_i): bboxes[idx] += box # nms nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=user_iou_threshold, threshold=user_threshold, box_format="midpoint",) nms_boxes_output.append(nms_boxes) # use gradio image annotations height, width = 416, 416 for box in nms_boxes: class_pred = box[0] box = box[2:] upper_left_x = int((box[0] - box[2] / 2) * width) upper_left_y = max(int((box[1] - box[3] / 2) * height),0) # less than 0, box collapses lower_right_x = int(upper_left_x + (box[2] * width)) lower_right_y = int(upper_left_y + (box[3] * height)) sections.append(((upper_left_x,upper_left_y,lower_right_x,lower_right_y), class_labels[int(class_pred)])) # for gradcam if gradcam: objs = [b[1] for b in nms_boxes_output[0]] bbox_coord = [b[2:] for b in nms_boxes_output[0]] targets = [FasterRCNNBoxScoreTarget(objs, bbox_coord)] target_layers = [inference_model.model] cam = EigenCAM(inference_model, target_layers, use_cuda=False,reshape_transform=yolov3_reshape_transform) grayscale_cam = cam(input_tensor = input_img_tensor, targets= targets) grayscale_cam = grayscale_cam[0, :] #renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32) #new_bboxes = [a[0] for a in sections] # for x1, y1, x2, y2 in new_bboxes: # renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy()) # renormalized_cam = scale_cam_image(renormalized_cam) #visualization = show_cam_on_image(input_img_copy, renormalized_cam, use_rgb=False, image_weight=gradcam_opa) visualization = show_cam_on_image(input_img_copy, grayscale_cam, use_rgb=False, image_weight=gradcam_opa) return (visualization,sections) else: return (np.array(input_img_tensor.squeeze(0).permute(1,2,0)),sections) # app GUI with gr.Row(): img_input = gr.Image() img_output = gr.AnnotatedImage().style(color_map = colors_hex) with gr.Row(): gradcam_check = gr.Checkbox(label="Gradcam") gradcam_opa = gr.Slider(0, 1, value = 0.5, label="Opacity of GradCAM") iou_threshold = gr.Slider(0, 1, value = 0.6, label="IOU Threshold") threshold = gr.Slider(0, 1, value = 0.5, label="Threshold") section_btn = gr.Button("Identify Objects") section_btn.click(yolo3_inference, inputs=[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], outputs=[img_output]) gr.Markdown("## Some Examples") gr.Examples(examples=examples, inputs =[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], outputs=img_output, fn=yolo3_inference, cache_examples=False) with gr.Row(): with gr.Box(): with gr.Row(): with gr.Column(): with gr.Box(): gr.Markdown(model_stats) if __name__ == "__main__": demo.launch()