File size: 8,116 Bytes
6bc5e46
0fd9718
 
 
 
 
 
 
 
 
 
 
 
6bc5e46
0fd9718
 
 
e835c7e
5eb7668
ee9ceac
 
 
8b3c33c
ee9ceac
0fd9718
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00f975e
6bc5e46
 
 
5f7693c
 
 
 
 
 
 
 
 
 
 
8a496e3
 
 
6bc5e46
 
257a05d
 
 
 
 
 
 
c1bc775
dd96198
a42c2a1
dd96198
6bc5e46
 
 
 
 
 
ee9ceac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02723e8
ee9ceac
 
00f975e
49b5807
ee9ceac
 
6bc5e46
 
 
ee9ceac
 
 
7885866
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee9ceac
7885866
6bc5e46
 
 
7885866
6bc5e46
 
 
 
 
a4a6781
6bc5e46
 
 
02723e8
ee9ceac
6bc5e46
 
 
 
 
 
 
 
 
 
 
ee9ceac
 
 
 
 
 
8b3c33c
ee9ceac
 
7885866
ee9ceac
8b3c33c
45249b2
 
8b3c33c
45249b2
 
8b3c33c
45249b2
 
 
ee9ceac
 
 
7885866
6bc5e46
ee9ceac
 
 
 
 
 
 
02723e8
 
ee9ceac
 
02723e8
dd96198
 
 
02723e8
dd96198
 
6bc5e46
5f7693c
 
 
 
 
 
 
 
6bc5e46
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208

import gradio as gr
from torchvision import datasets, transforms
import albumentations as Al
from albumentations.pytorch import ToTensorV2
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from torch.optim.lr_scheduler import OneCycleLR
from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.loggers import CSVLogger,TensorBoardLogger
from tqdm import tqdm
import torch
import torch.optim as optim
import matplotlib
import cv2
from pytorch_grad_cam import EigenCAM
from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image


# my files
import utils
import config
from model import YOLOv3
from utils import (
    mean_average_precision,
    cells_to_bboxes,
    get_evaluation_bboxes,
    save_checkpoint,
    load_checkpoint,
    check_class_accuracy,
    plot_couple_examples,
    accuracy_fn,
    get_loaders
)
from loss import YoloLoss
import litmodelclass


# gradio

model_stats = """
### YoloV3 Model Implementation & Training Details
Github Link: https://github.com/santule/ERA/tree/main/S13
#### Model Performance:
1. **Validation Loss: 6.05** 
2. **Class accuracy: 82.4%** 
3. **No obj accuracy: 98.05%** 
4. **Obj accuracy: 72.3%**
"""

title = "Yolo3 trained on PASCAL_VOC with GradCAM"
description = "Gradio interface to infer on Yolo3 model, and get GradCAM results"

with gr.Blocks() as demo:

    gr.Markdown(
    """
    # Yolo3 model trained on PASCAL_VOC dataset Demo!
    20 Classes supported - aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor 
    """
    )
    

    # example images
    examples = [["example_images/009948.jpg"],["example_images/000041.jpg"],["example_images/000042.jpg"],["example_images/000043.jpg"],["example_images/000044.jpg"],["example_images/000045.jpg"]]
    
    # colors for the bboxes
    cmap = plt.get_cmap("tab20b")
    class_labels = config.PASCAL_CLASSES
    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
    colors_hex = {class_labels[i]:matplotlib.colors.rgb2hex(colors[i]) for i in range(0,len(class_labels))}

    # consolidate the output from the model for gradcam to work
    def yolov3_reshape_transform(x): 
      activations = []
      size = x[0].size()[2:4] # 13 * 13
      for x_item in x:
        x_permute = x_item.permute(0, 1, 4, 2, 3 ) # 1,3,25,13,13
        x_permute = x_permute.reshape((x_permute.shape[0],
                                    x_permute.shape[1]*x_permute.shape[2],
                                    *x_permute.shape[3:])) # 1,75,13,13
        activations.append(torch.nn.functional.interpolate(torch.abs(x_permute), size, mode='bilinear'))
      activations = torch.cat(activations, axis=1) # 1,255,13,13
      return(activations)
        

    # main function of the app
    def yolo3_inference(input_img,gradcam=True,gradcam_opa=0.5,user_iou_threshold=0.6,user_threshold=0.5): # function for yolo inference
      
      # load model
      yololit = litmodelclass.LitYolo()
      inference_model = yololit.load_from_checkpoint("yolo3_improved_model.ckpt")
      
      # bboxes, gradcam
      anchors  = (torch.tensor(config.ANCHORS) * torch.tensor(config.S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2))
      bboxes   = [[]]
      sections = [] # to return image and annotations
      nms_boxes_output = []

      # process the input image for inference/gradcam
      # input_img = cv2.resize(input_img, (416, 416))
      # input_img_copy = input_img.copy()
      # input_img = np.float32(input_img) / 255
      # transform = transforms.ToTensor()
      # input_img = transform(input_img).unsqueeze(0)

     # image transformation
      test_transforms = Al.Compose(
        [
            Al.LongestMaxSize(max_size=416),
            Al.PadIfNeeded(
                min_height=416, min_width=416, border_mode=cv2.BORDER_CONSTANT
            ),
            Al.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
            #ToTensorV2(),
        ]
      )
        
      input_img_copy = test_transforms(image=input_img)['image']
      transform = transforms.ToTensor()
      input_img_tensor = transform(input_img_copy).unsqueeze(0)

      # infer the image
      inference_model.eval()
      test_img_out   = inference_model(input_img_tensor)

      # process the outputs to create bounding boxes
      for i in range(3):
          batch_size, A, S, _, _ = test_img_out[i].shape # 1, anchors = 3, scaling = 13/26/52
          anchor = anchors[i]
          boxes_scale_i = utils.cells_to_bboxes(test_img_out[i], anchor, S=S, is_preds=True)
          for idx, (box) in enumerate(boxes_scale_i):
              bboxes[idx] += box
      # nms
      nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=user_iou_threshold, threshold=user_threshold, box_format="midpoint",)
      nms_boxes_output.append(nms_boxes)

      # use gradio image annotations
      height, width = 416, 416
      for box in nms_boxes:
        class_pred = box[0]
        box = box[2:]
        upper_left_x  = int((box[0] - box[2] / 2) * width)
        upper_left_y  = max(int((box[1] - box[3] / 2) * height),0) # less than 0, box collapses
        lower_right_x = int(upper_left_x + (box[2] * width))
        lower_right_y = int(upper_left_y + (box[3] * height))
        sections.append(((upper_left_x,upper_left_y,lower_right_x,lower_right_y), class_labels[int(class_pred)]))
      
      # for gradcam
      if gradcam:
        objs = [b[1] for b in nms_boxes_output[0]]
        bbox_coord = [b[2:] for b in nms_boxes_output[0]]
        targets = [FasterRCNNBoxScoreTarget(objs, bbox_coord)]
        
        target_layers = [inference_model.model]
        cam = EigenCAM(inference_model, target_layers, use_cuda=False,reshape_transform=yolov3_reshape_transform)
        grayscale_cam = cam(input_tensor = input_img_tensor, targets= targets)
        grayscale_cam = grayscale_cam[0, :]
        
        #renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
        #new_bboxes = [a[0] for a in sections]
          
        # for x1, y1, x2, y2 in new_bboxes:
        #     renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())

        # renormalized_cam = scale_cam_image(renormalized_cam) 
        #visualization = show_cam_on_image(input_img_copy, renormalized_cam, use_rgb=False, image_weight=gradcam_opa)
        visualization = show_cam_on_image(input_img_copy, grayscale_cam, use_rgb=False, image_weight=gradcam_opa)

        return (visualization,sections)
      else:
        return (np.array(input_img_tensor.squeeze(0).permute(1,2,0)),sections)

    # app GUI
    with gr.Row():
        img_input  = gr.Image()
        img_output = gr.AnnotatedImage().style(color_map = colors_hex)
    with gr.Row():
      gradcam_check = gr.Checkbox(label="Gradcam")
      gradcam_opa = gr.Slider(0, 1, value = 0.5, label="Opacity of GradCAM")
      iou_threshold = gr.Slider(0, 1, value = 0.6, label="IOU Threshold")
      threshold = gr.Slider(0, 1, value = 0.5, label="Threshold")
        
    section_btn = gr.Button("Identify Objects")
    section_btn.click(yolo3_inference, inputs=[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold], outputs=[img_output])

    gr.Markdown("## Some Examples")
    gr.Examples(examples=examples,
                             inputs =[img_input,gradcam_check,gradcam_opa,iou_threshold,threshold],
                             outputs=img_output,
                             fn=yolo3_inference, cache_examples=False)

    
    with gr.Row():
        with gr.Box():
            with gr.Row():
                with gr.Column():
                    with gr.Box():
                        gr.Markdown(model_stats)

if __name__ == "__main__":
    demo.launch()