| """ |
| Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets |
| """ |
|
|
| import os |
|
|
| import numpy as np |
| import pandas as pd |
| import torch |
| from PIL import Image, ImageFile |
| from torch.utils.data import Dataset |
|
|
| ImageFile.LOAD_TRUNCATED_IMAGES = True |
|
|
|
|
| class YOLODataset(Dataset): |
| def __init__( |
| self, |
| csv_file, |
| img_dir, |
| label_dir, |
| anchors, |
| image_size=416, |
| S=[13, 26, 52], |
| transform=None, |
| load_mosaic=True, |
| ): |
| self.annotations = pd.read_csv(csv_file) |
| self.img_dir = img_dir |
| self.label_dir = label_dir |
| self.image_size = image_size |
| self.transform = transform |
| self.S = S |
| self.load_mosaic = load_mosaic |
|
|
| |
| self.anchors = torch.tensor(anchors) |
| self.num_anchors_per_scale = self.anchors.shape[1] |
|
|
| def __len__(self): |
| return len(self.annotations) |
|
|
| @staticmethod |
| def iou(box, anchors): |
| """ |
| box: |
| tensor shape: [2] |
| anchors: |
| tensor shape: [number of states, number of anchors, 2] |
| |
| * 2 above is for width and height |
| """ |
|
|
| intersection = torch.prod(torch.min(box, anchors), dim=-1) |
| union = torch.prod(box) + torch.prod(anchors, dim=-1) - intersection |
| return intersection / union |
|
|
| def __getitem__(self, index): |
| label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) |
|
|
| |
| bboxes = np.roll( |
| np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1 |
| ) |
| img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) |
| image = np.array(Image.open(img_path).convert("RGB")) |
|
|
| if self.transform: |
| augmentations = self.transform(image=image, bboxes=bboxes) |
| image = augmentations["image"] |
| bboxes = augmentations["bboxes"] |
|
|
| """ |
| Below assumes 3 scale predictions (as paper) and same num of anchors per scale |
| 6 = [objectness, cx, cy, w, h, class] |
| """ |
| targets = [torch.zeros((self.num_anchors_per_scale, S, S, 6)) for S in self.S] |
|
|
| for bbox in bboxes: |
| iou = self.iou(torch.tensor(bbox[2:4]), self.anchors) |
|
|
| idx = torch.argsort(iou, descending=True, dim=-1) |
| idx = idx[:, 0].tolist() |
|
|
| dimensions, class_ = np.array(bbox[:-1]), bbox[-1] |
|
|
| for scale_idx, anchor_id in enumerate(idx): |
| scale_dim = self.S[scale_idx] |
| scale_cx, scale_cy, scale_w, scale_h = dimensions * scale_dim |
|
|
| row, col = int(scale_cy), int(scale_cx) |
|
|
| |
| scale_cx = scale_cx - col |
| scale_cy = scale_cy - row |
|
|
| box_target = torch.tensor( |
| [1, scale_cx, scale_cy, scale_w, scale_h, class_] |
| ) |
|
|
| targets[scale_idx][anchor_id, row, col] = box_target |
|
|
| return image, targets |
|
|
|
|
| if __name__ == "__main__": |
| from src.run.yolov3 import config |
|
|
| IMAGE_SIZE = config.IMAGE_SIZE |
| train_dataset = YOLODataset( |
| config.DATASET + "/2examples.csv", |
| transform=config.train_transforms, |
| S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8], |
| img_dir=config.IMG_DIR, |
| label_dir=config.LABEL_DIR, |
| anchors=config.ANCHORS, |
| ) |
|
|