Spaces:
Build error
Build error
File size: 8,198 Bytes
60dd03b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import torch
import torchvision
from dl_hub.YOLO_V3.yolo_v3_utils.utils import non_max_suppression, plot_image
from pytorch_grad_cam import AblationCAM, EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import numpy as np
from dl_hub.YOLO_V3 import config
from dl_hub.YOLO_V3.yolo_v3_utils.utils import (
cells_to_bboxes,
)
# This will help us create a different color for each class
#COLORS = np.random.uniform(0, 255, size=(len(config.PASCAL_CLASSES), 3))
COLORS = np.tile(np.array([[255.,0.,0.]]),(len(config.PASCAL_CLASSES),1))
class YOLOv3Target:
"""
Dummy target so EigenCAM library doesnt throw an exception
"""
pass
def get_box_coords(box, im):
"""
Convert the scaled coordinates of the bounding boxes to the original image dimensions
"""
height, width, _ = im.shape
upper_left_x = int((box[0] - box[2] / 2)*width)
upper_left_y = int((box[1] - box[3] / 2)*height)
lower_right_x = upper_left_x + int(box[2]*width)
lower_right_y = upper_left_y + int(box[3]*height)
return [upper_left_x, upper_left_y, lower_right_x, lower_right_y]
def draw_boxes(boxes, labels, classes, confidences, image):
"""
draw boxes on the image using open cv putText method
"""
w, h, _ = image.shape
for i, box in enumerate(boxes):
color = tuple(COLORS[labels[i]])
print(color)
#cv2.rectangle(
# image,
# (int(box[0]), int(box[1])),
# (int(box[2]), int(box[3])),
# color, 2
#)
cv2.rectangle(
image,
(max(int(box[0]),10), max(10,int(box[1]))),
(min(int(box[2]),w-10), min(int(box[3]),h-10)),
color, 2
)
#cv2.putText(image, f"{classes[i]}({confidences[i]:0.2f})", (int(box[0]), int(box[1] - 5)),
# cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
# lineType=cv2.LINE_AA)
if int(box[1]) < 10:
text_coords = (max(int(box[0]),10), max(10,int(box[1]))+20)
else:
text_coords = (int(box[0]), int(box[1] - 5))
cv2.putText(image, f"{classes[i]}({confidences[i]:0.2f})",text_coords,
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
lineType=cv2.LINE_AA)
return image
def get_eigen_cam_image_overlay(boxes,
image_float_np,
grayscale_cam,
renormalize=False,
image_weight=0.7):
"""
Show the CAM image along with the detected bounding boxes.
If renormalize is set to true,Normalize the CAM to be in the range [0, 1]
inside every bounding boxes, and zero outside of the bounding boxes.
"""
renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
images = []
box_coords = []
if renormalize:
for box in boxes:
[x1,y1,x2,y2] = get_box_coords(box[2:],image_float_np)
img = renormalized_cam * 0
img[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
images.append(img)
box_coords.append([x1,y1,x2,y2])
renormalized_cam = np.max(np.float32(images), axis = 0)
renormalized_cam = scale_cam_image(renormalized_cam)
eigen_cam_image = show_cam_on_image(image_float_np,
renormalized_cam,
use_rgb=True,
image_weight=image_weight)
else:
eigen_cam_image = show_cam_on_image(image_float_np,
grayscale_cam,
use_rgb=True,
image_weight=image_weight)
return eigen_cam_image
def export_image_with_bounding_box(image, boxes,labels, classes, confidences):
"""
export image and bounding boxes, labels, classes, and confidences to PIL image
"""
box_coords = []
for box in boxes:
print("************")
print(box)
[x1,y1,x2,y2] = get_box_coords(box[2:],image)
box_coords.append([x1,y1,x2,y2])
pil_img = Image.fromarray(draw_boxes(box_coords,labels,classes,confidences,image))
return pil_img
def get_labels(boxes):
"""
Extract the class names, class labels, and confidence values from the predicted bounding boxes
"""
cmap = plt.get_cmap("tab20b")
class_labels = config.COCO_LABELS if config.DATASET=='COCO' else config.PASCAL_CLASSES
all_class_names = []
all_class_labels = []
all_confidences = []
for box in boxes:
assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
class_pred = box[0]
class_name = class_labels[int(class_pred)]
all_class_names.append(class_name)
class_label = int(class_pred)
all_class_labels.append(class_label)
all_confidences.append(box[1])
return all_class_names, all_class_labels, all_confidences
def predict(model, loader, anchors, device, thresh=0.6, iou_thresh=0.5):
"""
run prediction on a batch of images, compute the non maximum suppression
to retrieve object bounding boxes, and return the tensor,
numpy array images, bounding boxes, predicted class labels, and confidence levels
"""
model.eval()
x, y = next(iter(loader))
x, y = next(iter(loader))
num_images = x.shape[0]
x = x.to(device)
global_height, global_width = x.shape[-2:]
boxes_list = []
classes_list = []
labels_list = []
confidences_list = []
with torch.no_grad():
out = model(x)
bboxes = [[] for _ in range(x.shape[0])]
for i in range(3):
batch_size, A, S, _, _ = out[i].shape
anchor = anchors[i]
boxes_scale_i = cells_to_bboxes(
out[i], anchor, S=S, is_preds=True
)
for idx, (box) in enumerate(boxes_scale_i):
bboxes[idx] += box
for i in range(num_images):
nms_boxes = non_max_suppression(
bboxes[i], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint",
)
classes, labels, confidences = get_labels(nms_boxes)
boxes_list.append(nms_boxes)
classes_list.append(classes)
labels_list.append(labels)
confidences_list.append(confidences)
x_np_float_array = np.asarray(x.cpu())
return x, x_np_float_array, boxes_list, classes_list, labels_list, confidences_list
def predict_single_image(input_tensor, model, anchors, device, thresh=0.6, iou_thresh=0.5):
"""
run prediction on a single image, compute the non maximum suppression
to retrieve object bounding boxes, and return the tensor,
numpy array images, bounding boxes, predicted class labels, and confidence levels
"""
model.eval()
input_tensor = input_tensor.to(device)
global_height, global_width = input_tensor.shape[-2:]
boxes_list = []
classes_list = []
labels_list = []
confidences_list = []
with torch.no_grad():
out = model(input_tensor)
bboxes = [[] for _ in range(input_tensor.shape[0])]
for i in range(3):
batch_size, A, S, _, _ = out[i].shape
anchor = anchors[i]
boxes_scale_i = cells_to_bboxes(
out[i], anchor, S=S, is_preds=True
)
for idx, (box) in enumerate(boxes_scale_i):
bboxes[idx] += box
nms_boxes = non_max_suppression(bboxes[0],
iou_threshold=iou_thresh,
threshold=thresh, box_format="midpoint")
classes, labels, confidences = get_labels(nms_boxes)
boxes_list.append(nms_boxes)
classes_list.append(classes)
labels_list.append(labels)
confidences_list.append(confidences)
#return boxes_list, classes_list, labels_list, confidences_list
return nms_boxes, classes, labels, confidences
|