File size: 8,198 Bytes
60dd03b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import torch
import torchvision
from dl_hub.YOLO_V3.yolo_v3_utils.utils import non_max_suppression, plot_image
from pytorch_grad_cam import AblationCAM, EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import numpy as np
from dl_hub.YOLO_V3 import config
from dl_hub.YOLO_V3.yolo_v3_utils.utils import (
    cells_to_bboxes,
)


# This will help us create a different color for each class
#COLORS = np.random.uniform(0, 255, size=(len(config.PASCAL_CLASSES), 3))
COLORS = np.tile(np.array([[255.,0.,0.]]),(len(config.PASCAL_CLASSES),1))



class YOLOv3Target:
    """
    Dummy target so EigenCAM library doesnt throw an exception
    """
    pass


def get_box_coords(box, im):
    """
    Convert the scaled coordinates of the bounding boxes to the original image dimensions
    """
    height, width, _ = im.shape
    upper_left_x = int((box[0] - box[2] / 2)*width)
    upper_left_y = int((box[1] - box[3] / 2)*height)
    lower_right_x = upper_left_x + int(box[2]*width)
    lower_right_y = upper_left_y + int(box[3]*height)
    return [upper_left_x, upper_left_y, lower_right_x, lower_right_y]


def draw_boxes(boxes, labels, classes, confidences, image):
    """
    draw boxes on the image using open cv putText method
    """
    w, h, _ = image.shape
    for i, box in enumerate(boxes):
        color = tuple(COLORS[labels[i]])
        print(color)
        #cv2.rectangle(
        #    image,
        #    (int(box[0]), int(box[1])),
        #    (int(box[2]), int(box[3])),
        #    color, 2
        #)
        cv2.rectangle(
            image,
            (max(int(box[0]),10), max(10,int(box[1]))),
            (min(int(box[2]),w-10), min(int(box[3]),h-10)),
            color, 2
        )
        #cv2.putText(image, f"{classes[i]}({confidences[i]:0.2f})", (int(box[0]), int(box[1] - 5)),
        #            cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
        #            lineType=cv2.LINE_AA)
        if int(box[1]) < 10:
            text_coords =  (max(int(box[0]),10), max(10,int(box[1]))+20)
        else:
            text_coords = (int(box[0]), int(box[1] - 5))
            
        cv2.putText(image, f"{classes[i]}({confidences[i]:0.2f})",text_coords,
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
                    lineType=cv2.LINE_AA)
    return image


def get_eigen_cam_image_overlay(boxes,
                                image_float_np,
                                grayscale_cam, 
                                renormalize=False, 
                                image_weight=0.7):
    """
    Show the CAM image along with the detected bounding boxes. 
    If renormalize is set to true,Normalize the CAM to be in the range [0, 1]
    inside every bounding boxes, and zero outside of the bounding boxes. 
    """
    renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
    images = []
    box_coords = []
    if renormalize:
        for box in boxes:
            [x1,y1,x2,y2] = get_box_coords(box[2:],image_float_np)
            img = renormalized_cam * 0
            img[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
            images.append(img)
            box_coords.append([x1,y1,x2,y2])
            renormalized_cam = np.max(np.float32(images), axis = 0)
            renormalized_cam = scale_cam_image(renormalized_cam)
            eigen_cam_image = show_cam_on_image(image_float_np, 
                                                renormalized_cam, 
                                                use_rgb=True,
                                                image_weight=image_weight)
    else:
        eigen_cam_image =  show_cam_on_image(image_float_np, 
                                             grayscale_cam, 
                                             use_rgb=True, 
                                             image_weight=image_weight)
    return eigen_cam_image

def export_image_with_bounding_box(image, boxes,labels, classes, confidences):
    """
    export image and bounding boxes, labels, classes, and confidences to PIL image
    """
    box_coords = []
    for box in boxes:
        print("************")
        print(box)
        [x1,y1,x2,y2] = get_box_coords(box[2:],image)
        box_coords.append([x1,y1,x2,y2])
    pil_img = Image.fromarray(draw_boxes(box_coords,labels,classes,confidences,image))
    return pil_img


def get_labels(boxes):
    """
    Extract the class names, class labels, and confidence values from the predicted bounding boxes
    """
    cmap = plt.get_cmap("tab20b")
    class_labels = config.COCO_LABELS if config.DATASET=='COCO' else config.PASCAL_CLASSES
    all_class_names = []
    all_class_labels = []
    all_confidences = []
    for box in boxes:
        assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
        class_pred = box[0]
        class_name = class_labels[int(class_pred)]
        all_class_names.append(class_name)
        class_label = int(class_pred)
        all_class_labels.append(class_label)
        all_confidences.append(box[1])
    return all_class_names, all_class_labels, all_confidences


def predict(model, loader, anchors, device, thresh=0.6, iou_thresh=0.5):
    """
    run prediction on a batch of images, compute the non maximum suppression
    to retrieve object bounding boxes, and return the tensor, 
    numpy array images, bounding boxes, predicted class labels, and confidence levels 
    """
    model.eval()
    x, y = next(iter(loader))
    x, y = next(iter(loader))
    num_images = x.shape[0]
    x = x.to(device)
    global_height, global_width = x.shape[-2:]
    boxes_list = []
    classes_list = []
    labels_list = []
    confidences_list = []
    with torch.no_grad():
        out = model(x)
        bboxes = [[] for _ in range(x.shape[0])]
        for i in range(3):
            batch_size, A, S, _, _ = out[i].shape
            anchor = anchors[i]
            boxes_scale_i = cells_to_bboxes(
                out[i], anchor, S=S, is_preds=True
            )
            for idx, (box) in enumerate(boxes_scale_i):
                bboxes[idx] += box
    for i in range(num_images):
        nms_boxes = non_max_suppression(
            bboxes[i], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint",
        )
        classes, labels, confidences = get_labels(nms_boxes)
        boxes_list.append(nms_boxes)
        classes_list.append(classes)
        labels_list.append(labels)
        confidences_list.append(confidences)
    x_np_float_array = np.asarray(x.cpu())
    return x, x_np_float_array, boxes_list, classes_list, labels_list, confidences_list


def predict_single_image(input_tensor, model, anchors, device, thresh=0.6, iou_thresh=0.5):
    """
    run prediction on a single image, compute the non maximum suppression
    to retrieve object bounding boxes, and return the tensor, 
    numpy array images, bounding boxes, predicted class labels, and confidence levels 
    """
    model.eval()
    input_tensor = input_tensor.to(device)
    global_height, global_width = input_tensor.shape[-2:]
    boxes_list = []
    classes_list = []
    labels_list = []
    confidences_list = []
    with torch.no_grad():
        out = model(input_tensor)
        bboxes = [[] for _ in range(input_tensor.shape[0])]
        for i in range(3):
            batch_size, A, S, _, _ = out[i].shape
            anchor = anchors[i]
            boxes_scale_i = cells_to_bboxes(
                out[i], anchor, S=S, is_preds=True
            )
            for idx, (box) in enumerate(boxes_scale_i):
                bboxes[idx] += box
    nms_boxes = non_max_suppression(bboxes[0], 
                                    iou_threshold=iou_thresh,
                                    threshold=thresh, box_format="midpoint")
    classes, labels, confidences = get_labels(nms_boxes)
    boxes_list.append(nms_boxes)
    classes_list.append(classes)
    labels_list.append(labels)
    confidences_list.append(confidences)
    #return boxes_list, classes_list, labels_list, confidences_list
    return nms_boxes, classes, labels, confidences