Spaces:
Sleeping
Sleeping
| import torch | |
| import torchvision | |
| from dl_hub.YOLO_V3.yolo_v3_utils.utils import non_max_suppression, plot_image | |
| from pytorch_grad_cam import AblationCAM, EigenCAM | |
| from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image | |
| from PIL import Image | |
| import cv2 | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from dl_hub.YOLO_V3 import config | |
| from dl_hub.YOLO_V3.yolo_v3_utils.utils import ( | |
| cells_to_bboxes, | |
| ) | |
| # This will help us create a different color for each class | |
| #COLORS = np.random.uniform(0, 255, size=(len(config.PASCAL_CLASSES), 3)) | |
| COLORS = np.tile(np.array([[255.,0.,0.]]),(len(config.PASCAL_CLASSES),1)) | |
| class YOLOv3Target: | |
| """ | |
| Dummy target so EigenCAM library doesnt throw an exception | |
| """ | |
| pass | |
| def get_box_coords(box, im): | |
| """ | |
| Convert the scaled coordinates of the bounding boxes to the original image dimensions | |
| """ | |
| height, width, _ = im.shape | |
| upper_left_x = int((box[0] - box[2] / 2)*width) | |
| upper_left_y = int((box[1] - box[3] / 2)*height) | |
| lower_right_x = upper_left_x + int(box[2]*width) | |
| lower_right_y = upper_left_y + int(box[3]*height) | |
| return [upper_left_x, upper_left_y, lower_right_x, lower_right_y] | |
| def draw_boxes(boxes, labels, classes, confidences, image): | |
| """ | |
| draw boxes on the image using open cv putText method | |
| """ | |
| w, h, _ = image.shape | |
| for i, box in enumerate(boxes): | |
| color = tuple(COLORS[labels[i]]) | |
| print(color) | |
| #cv2.rectangle( | |
| # image, | |
| # (int(box[0]), int(box[1])), | |
| # (int(box[2]), int(box[3])), | |
| # color, 2 | |
| #) | |
| cv2.rectangle( | |
| image, | |
| (max(int(box[0]),10), max(10,int(box[1]))), | |
| (min(int(box[2]),w-10), min(int(box[3]),h-10)), | |
| color, 2 | |
| ) | |
| #cv2.putText(image, f"{classes[i]}({confidences[i]:0.2f})", (int(box[0]), int(box[1] - 5)), | |
| # cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, | |
| # lineType=cv2.LINE_AA) | |
| if int(box[1]) < 10: | |
| text_coords = (max(int(box[0]),10), max(10,int(box[1]))+20) | |
| else: | |
| text_coords = (int(box[0]), int(box[1] - 5)) | |
| cv2.putText(image, f"{classes[i]}({confidences[i]:0.2f})",text_coords, | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, | |
| lineType=cv2.LINE_AA) | |
| return image | |
| def get_eigen_cam_image_overlay(boxes, | |
| image_float_np, | |
| grayscale_cam, | |
| renormalize=False, | |
| image_weight=0.7): | |
| """ | |
| Show the CAM image along with the detected bounding boxes. | |
| If renormalize is set to true,Normalize the CAM to be in the range [0, 1] | |
| inside every bounding boxes, and zero outside of the bounding boxes. | |
| """ | |
| renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32) | |
| images = [] | |
| box_coords = [] | |
| if renormalize: | |
| for box in boxes: | |
| [x1,y1,x2,y2] = get_box_coords(box[2:],image_float_np) | |
| img = renormalized_cam * 0 | |
| img[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy()) | |
| images.append(img) | |
| box_coords.append([x1,y1,x2,y2]) | |
| renormalized_cam = np.max(np.float32(images), axis = 0) | |
| renormalized_cam = scale_cam_image(renormalized_cam) | |
| eigen_cam_image = show_cam_on_image(image_float_np, | |
| renormalized_cam, | |
| use_rgb=True, | |
| image_weight=image_weight) | |
| else: | |
| eigen_cam_image = show_cam_on_image(image_float_np, | |
| grayscale_cam, | |
| use_rgb=True, | |
| image_weight=image_weight) | |
| return eigen_cam_image | |
| def export_image_with_bounding_box(image, boxes,labels, classes, confidences): | |
| """ | |
| export image and bounding boxes, labels, classes, and confidences to PIL image | |
| """ | |
| box_coords = [] | |
| for box in boxes: | |
| print("************") | |
| print(box) | |
| [x1,y1,x2,y2] = get_box_coords(box[2:],image) | |
| box_coords.append([x1,y1,x2,y2]) | |
| pil_img = Image.fromarray(draw_boxes(box_coords,labels,classes,confidences,image)) | |
| return pil_img | |
| def get_labels(boxes): | |
| """ | |
| Extract the class names, class labels, and confidence values from the predicted bounding boxes | |
| """ | |
| cmap = plt.get_cmap("tab20b") | |
| class_labels = config.COCO_LABELS if config.DATASET=='COCO' else config.PASCAL_CLASSES | |
| all_class_names = [] | |
| all_class_labels = [] | |
| all_confidences = [] | |
| for box in boxes: | |
| assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" | |
| class_pred = box[0] | |
| class_name = class_labels[int(class_pred)] | |
| all_class_names.append(class_name) | |
| class_label = int(class_pred) | |
| all_class_labels.append(class_label) | |
| all_confidences.append(box[1]) | |
| return all_class_names, all_class_labels, all_confidences | |
| def predict(model, loader, anchors, device, thresh=0.6, iou_thresh=0.5): | |
| """ | |
| run prediction on a batch of images, compute the non maximum suppression | |
| to retrieve object bounding boxes, and return the tensor, | |
| numpy array images, bounding boxes, predicted class labels, and confidence levels | |
| """ | |
| model.eval() | |
| x, y = next(iter(loader)) | |
| x, y = next(iter(loader)) | |
| num_images = x.shape[0] | |
| x = x.to(device) | |
| global_height, global_width = x.shape[-2:] | |
| boxes_list = [] | |
| classes_list = [] | |
| labels_list = [] | |
| confidences_list = [] | |
| with torch.no_grad(): | |
| out = model(x) | |
| bboxes = [[] for _ in range(x.shape[0])] | |
| for i in range(3): | |
| batch_size, A, S, _, _ = out[i].shape | |
| anchor = anchors[i] | |
| boxes_scale_i = cells_to_bboxes( | |
| out[i], anchor, S=S, is_preds=True | |
| ) | |
| for idx, (box) in enumerate(boxes_scale_i): | |
| bboxes[idx] += box | |
| for i in range(num_images): | |
| nms_boxes = non_max_suppression( | |
| bboxes[i], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint", | |
| ) | |
| classes, labels, confidences = get_labels(nms_boxes) | |
| boxes_list.append(nms_boxes) | |
| classes_list.append(classes) | |
| labels_list.append(labels) | |
| confidences_list.append(confidences) | |
| x_np_float_array = np.asarray(x.cpu()) | |
| return x, x_np_float_array, boxes_list, classes_list, labels_list, confidences_list | |
| def predict_single_image(input_tensor, model, anchors, device, thresh=0.6, iou_thresh=0.5): | |
| """ | |
| run prediction on a single image, compute the non maximum suppression | |
| to retrieve object bounding boxes, and return the tensor, | |
| numpy array images, bounding boxes, predicted class labels, and confidence levels | |
| """ | |
| model.eval() | |
| input_tensor = input_tensor.to(device) | |
| global_height, global_width = input_tensor.shape[-2:] | |
| boxes_list = [] | |
| classes_list = [] | |
| labels_list = [] | |
| confidences_list = [] | |
| with torch.no_grad(): | |
| out = model(input_tensor) | |
| bboxes = [[] for _ in range(input_tensor.shape[0])] | |
| for i in range(3): | |
| batch_size, A, S, _, _ = out[i].shape | |
| anchor = anchors[i] | |
| boxes_scale_i = cells_to_bboxes( | |
| out[i], anchor, S=S, is_preds=True | |
| ) | |
| for idx, (box) in enumerate(boxes_scale_i): | |
| bboxes[idx] += box | |
| nms_boxes = non_max_suppression(bboxes[0], | |
| iou_threshold=iou_thresh, | |
| threshold=thresh, box_format="midpoint") | |
| classes, labels, confidences = get_labels(nms_boxes) | |
| boxes_list.append(nms_boxes) | |
| classes_list.append(classes) | |
| labels_list.append(labels) | |
| confidences_list.append(confidences) | |
| #return boxes_list, classes_list, labels_list, confidences_list | |
| return nms_boxes, classes, labels, confidences | |