Spaces:
Runtime error
Runtime error
| import project_path | |
| import os | |
| import cv2 | |
| import numpy as np | |
| import json | |
| from threading import Lock | |
| from contextlib import contextmanager | |
| import torch | |
| from torch.utils.data import Dataset | |
| from PIL import Image | |
| from datetime import datetime | |
| # assumes yolov5 on sys.path | |
| from lib.yolov5.utils.general import xyxy2xywh | |
| from lib.yolov5.utils.augmentations import letterbox | |
| from lib.yolov5.utils.dataloaders import create_dataloader as create_yolo_dataloader | |
| from backend.pyDIDSON import pyDIDSON | |
| from backend.aris import BEAM_WIDTH_DIR | |
| # use this flag to test the difference between direct ARIS dataloading and | |
| # using the jpeg compressed version. very slow. not much difference observed. | |
| TEST_JPG_COMPRESSION = False | |
| # # # # # # | |
| # Factory(ish) methods for DataLoader creation. Easy entry points to this module. | |
| # # # # # # | |
| def create_dataloader_aris(aris_filepath, beam_width_dir=BEAM_WIDTH_DIR, annotations_file=None, batch_size=32, stride=64, pad=0.5, img_size=896, rank=-1, world_size=1, workers=0, | |
| disable_output=False, cache_bg_frames=False, num_frames_bg_subtract=1000): | |
| """ | |
| Get a PyTorch Dataset and DataLoader for ARIS files with (optional) associated fisheye-formatted labels. | |
| """ | |
| print('dataset', datetime.now()) | |
| # Make sure only the first process in DDP process the dataset first, and the following others can use the cache | |
| # this is a no-op for a single-gpu machine | |
| with torch_distributed_zero_first(rank): | |
| dataset = YOLOARISBatchedDataset(aris_filepath, beam_width_dir, annotations_file, stride, pad, img_size, | |
| disable_output=disable_output, cache_bg_frames=cache_bg_frames, num_frames_bg_subtract=num_frames_bg_subtract) | |
| batch_size = min(batch_size, len(dataset)) | |
| nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers | |
| print('dataloader', datetime.now()) | |
| if not disable_output: | |
| print("dataset size", len(dataset)) | |
| print("dataset shape", dataset.shape) | |
| print("Num workers", nw) | |
| # sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None # if extending to multi-GPU inference, will need this | |
| dataloader = torch.utils.data.dataloader.DataLoader(dataset, | |
| batch_size=None, | |
| sampler=OnePerBatchSampler(data_source=dataset, batch_size=batch_size), | |
| num_workers=nw, | |
| pin_memory=True, | |
| collate_fn=collate_fn) | |
| print('done', datetime.now()) | |
| return dataloader, dataset | |
| def create_dataloader_frames(frames_path, batch_size=32, model_stride_max=32, | |
| pad=0.5, img_size=896, rank=-1, world_size=1, workers=0, disable_output=False): | |
| """ | |
| Create a DataLoader for a directory of frames without labels. | |
| Args: | |
| model_stride_max: use model.stride.max() | |
| """ | |
| gs = max(int(model_stride_max), 32) # grid size (max stride) | |
| return create_yolo_dataloader(frames_path, img_size, batch_size, gs, single_cls=False, augment=False, | |
| hyp=None, cache=None, rect=True, rank=rank, | |
| workers=workers, pad=pad)[0] | |
| def create_dataloader_frames_only(frames_path, batch_size=32, img_size=896, workers=0): | |
| """ | |
| Create a DataLoader for a directory of frames without labels. | |
| Args: | |
| model_stride_max: use model.stride.max() | |
| """ | |
| return YOLOFrameDataset(frames_path, img_size=img_size, batch_size=batch_size) | |
| # # # # # # | |
| # End factory(ish) methods | |
| # # # # # # | |
| import os | |
| import pandas as pd | |
| from torchvision.io import read_image | |
| import re | |
| class YOLOFrameDataset(Dataset): | |
| def __init__(self, img_dir, img_size=896, batch_size=32, stride=64, pad=0.5): | |
| self.img_dir = img_dir | |
| self.img_size = img_size | |
| self.files = os.listdir(img_dir) | |
| self.files = list(filter(lambda f: f[-4:] == ".jpg", self.files)) | |
| self.files.sort(key=lambda f: int(re.sub('\D', '', f))) | |
| temp_img = read_image(os.path.join(self.img_dir, self.files[0])) | |
| size = temp_img.shape | |
| self.ydim = size[1] | |
| self.xdim = size[2] | |
| n = len(self.files) | |
| aspect_ratio = self.ydim / self.xdim | |
| if aspect_ratio < 1: | |
| shape = [aspect_ratio, 1] | |
| elif aspect_ratio > 1: | |
| shape = [1, 1 / aspect_ratio] | |
| self.original_shape = (self.ydim, self.xdim) | |
| self.shape = np.ceil(np.array(shape) * img_size / stride + pad).astype(int) * stride | |
| self.batch_indices = [] | |
| for i in range(0, n, batch_size): | |
| self.batch_indices.append((i, min(n, i+batch_size))) | |
| def load_image(cls, img, img_size=896): | |
| """Loads and resizes 1 image from dataset, returns img, original hw, resized hw. | |
| Modified from ScaledYOLOv4.datasets.load_image() | |
| """ | |
| h0, w0 = img.shape[:2] | |
| h1, w1 = h0, w0 | |
| r = img_size / max(h0, w0) | |
| if r != 1: # always resize down, only resize up if training with augmentation | |
| interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR | |
| img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) | |
| h1, w1 = img.shape[:2] | |
| return img, (h0, w0), (h1, w1) # img, hw_original, hw_resized | |
| def __len__(self): | |
| return len(self.batch_indices) | |
| def __iter__(self): | |
| for batch_idx in self.batch_indices: | |
| batch = [] | |
| labels = None | |
| shapes = [] | |
| for i in range(batch_idx[0], batch_idx[1]): | |
| img_name = self.files[i] | |
| img_path = os.path.join(self.img_dir, img_name) | |
| image = Image.open(img_path) | |
| image = np.asarray(image) | |
| img, (h0, w0), (h, w) = self.load_image(image, img_size=self.img_size) | |
| # Letterbox | |
| img, ratio, pad = letterbox(img, self.shape, auto=False, scaleup=False) | |
| shape = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling | |
| img = img.transpose(2, 0, 1) # to -> C x H x W | |
| img = np.ascontiguousarray(img) | |
| img = torch.from_numpy(img) | |
| shapes.append(shape) | |
| batch.append(img) | |
| image = torch.stack(batch) | |
| yield (image, labels, shapes) | |
| class ARISBatchedDataset(Dataset): | |
| def __init__(self, aris_filepath, beam_width_dir, annotations_file, batch_size, num_frames_bg_subtract=1000, disable_output=False, | |
| cache_bg_frames=False): | |
| """ | |
| A PyTorch Dataset class for loading an ARIS file and (optional) associated fisheye-format labels. | |
| This class handles the ARIS frame extraction and 3-channel representation generation. | |
| It is called a "BatchedDataset" because it loads contiguous frames in self.batch_size chunks. | |
| ** The PyTorch sampler must be aware of this!! ** Use the OnePerBatchSampler in this module when using this Dataset. | |
| Args: | |
| cache_bg_frames: keep the frames used for bg subtraction stored in memory. careful of memory issues. only recommended | |
| for small values of num_frames_bg_subtract | |
| """ | |
| # open ARIS data stream - TODO: make sure this is one per worker | |
| self.data = open(aris_filepath, 'rb') | |
| self.data_lock = Lock() | |
| self.beam_width_dir = beam_width_dir | |
| self.disable_output = disable_output | |
| self.aris_filepath = aris_filepath | |
| self.cache_bg_frames = cache_bg_frames | |
| # get header info | |
| self.didson = pyDIDSON(self.aris_filepath, beam_width_dir=beam_width_dir) | |
| self.xdim = self.didson.info['xdim'] | |
| self.ydim = self.didson.info['ydim'] | |
| # disable automatic batching - do it ourselves, reading batch_size frames from | |
| # the ARIS file at a time | |
| self.batch_size = batch_size | |
| # load fisheye annotations | |
| if annotations_file is None: | |
| if not self.disable_output: | |
| print("Loading file with no labels.") | |
| self.start_frame = self.didson.info['startframe'] | |
| self.end_frame = self.didson.info['endframe'] or self.didson.info['numframes'] | |
| self.labels = None | |
| else: | |
| self._load_labels(annotations_file) | |
| # intiialize the background subtraction | |
| self.num_frames_bg_subtract = num_frames_bg_subtract | |
| self._init_bg_frame() | |
| def _init_bg_frame(self): | |
| """ | |
| Intialize bg frame for bg subtraction. | |
| Uses min(self.num_frames_bg_subtract, total_frames) frames to do mean subtraction. | |
| Caches these frames in self.extracted_frames for reuse. | |
| """ | |
| # ensure the number of frames used is a multiple of self.batch_size so we can cache them and retrieve full batches | |
| # add 1 extra frame to be used for optical flow calculation | |
| num_frames_bg = min(self.end_frame - self.start_frame, self.num_frames_bg_subtract // self.batch_size * self.batch_size + 1) | |
| if not self.disable_output: | |
| print("Initializing mean frame for background subtraction using", num_frames_bg, "frames...") | |
| frames_for_bg_subtract = self.didson.load_frames(start_frame=self.start_frame, end_frame=self.start_frame + num_frames_bg) | |
| ### NEW WAY ### | |
| # save memory (and time?) by computing these in a streaming fashion vs. in a big batch | |
| self.mean_blurred_frame = np.zeros([self.ydim, self.xdim], dtype=np.float32) | |
| max_blurred_frame = np.zeros([self.ydim, self.xdim], dtype=np.float32) | |
| for i in range(frames_for_bg_subtract.shape[0]): | |
| blurred = cv2.GaussianBlur( | |
| frames_for_bg_subtract[i], | |
| (5,5), | |
| 0) | |
| self.mean_blurred_frame += blurred | |
| max_blurred_frame = np.maximum(max_blurred_frame, np.abs(blurred)) | |
| self.mean_blurred_frame /= frames_for_bg_subtract.shape[0] | |
| max_blurred_frame -= self.mean_blurred_frame | |
| self.mean_normalization_value = np.max(max_blurred_frame) | |
| # cache these for later | |
| self.extracted_frames = [] | |
| # Because of the optical flow computation, we only go to end_frame - 1 | |
| next_blur = None | |
| for i in range(len(frames_for_bg_subtract) - 1): | |
| if next_blur is None: | |
| this_blur = ((cv2.GaussianBlur(frames_for_bg_subtract[i], (5,5), 0) - self.mean_blurred_frame) / self.mean_normalization_value + 1) / 2 | |
| else: | |
| this_blur = next_blur | |
| next_blur = ((cv2.GaussianBlur(frames_for_bg_subtract[i+1], (5,5), 0) - self.mean_blurred_frame) / self.mean_normalization_value + 1) / 2 | |
| frame_image = np.dstack([frames_for_bg_subtract[i], | |
| this_blur * 255, | |
| np.abs(next_blur - this_blur) * 255]).astype(np.uint8, copy=False) | |
| if TEST_JPG_COMPRESSION: | |
| from PIL import Image | |
| import os | |
| Image.fromarray(frame_image).save(f"tmp{i}.jpg", quality=95) | |
| frame_image = cv2.imread(f"tmp{i}.jpg")[:, :, ::-1] # BGR to RGB | |
| os.remove(f"tmp{i}.jpg") | |
| if self.cache_bg_frames: | |
| self.extracted_frames.append(frame_image) | |
| if not self.disable_output: | |
| print("Done initializing background frame.") | |
| def _load_labels(self, fisheye_json): | |
| """Load labels from a fisheye-formatted json file into self.labels in normalized | |
| xywh format. | |
| """ | |
| js = json.load(open(fisheye_json, 'r')) | |
| labels = [] | |
| for frame in js['frames']: | |
| l = [] | |
| for fish in frame['fish']: | |
| x, y, w, h = xyxy2xywh(fish['bbox']) | |
| cx = x + w/2.0 | |
| cy = y + h/2.0 | |
| # Each row is `class x_center y_center width height` format. (Normalized) | |
| l.append([0, cx, cy, w, h]) | |
| l = np.array(l, dtype=np.float32) | |
| if len(l) == 0: | |
| l = np.zeros((0, 5), dtype=np.float32) | |
| labels.append(l) | |
| self.labels = labels | |
| self.start_frame = js['start_frame'] | |
| self.end_frame = js['end_frame'] | |
| def __len__(self): | |
| # account for optical flow - we can't do the last frame | |
| return self.end_frame - self.start_frame - 1 | |
| def _postprocess(self, frame_images, frame_labels): | |
| raise NotImplementedError | |
| def __getitem__(self, idx): | |
| """ | |
| Return a numpy array representing this batch of frames and labels according to pyARIS frame extraction logic. | |
| This class returns a full batch rather than just 1 example, assuming a OnePerBatchSampler is used. | |
| """ | |
| final_idx = min(idx+self.batch_size, len(self)) | |
| frame_labels = self.labels[idx:final_idx] if self.labels else None | |
| # see if we have already cached this from bg subtraction | |
| # assumes len(self.extracted_frames) is a multiple of self.batch_size | |
| if idx+1 < len(self.extracted_frames): | |
| return self._postprocess(self.extracted_frames[idx:final_idx], frame_labels) | |
| else: | |
| frames = self.didson.load_frames(start_frame=self.start_frame+idx, end_frame=self.start_frame + final_idx + 1) | |
| blurred_frames = frames.astype(np.float32) | |
| for i in range(frames.shape[0]): | |
| blurred_frames[i] = cv2.GaussianBlur( | |
| blurred_frames[i], | |
| (5,5), | |
| 0 | |
| ) | |
| blurred_frames -= self.mean_blurred_frame | |
| blurred_frames /= self.mean_normalization_value | |
| blurred_frames += 1 | |
| blurred_frames /= 2 | |
| frame_images = np.stack([ frames[:-1], blurred_frames[:-1] * 255, np.abs(blurred_frames[1:] - blurred_frames[:-1]) * 255 ], axis=-1).astype(np.uint8, copy=False) | |
| if TEST_JPG_COMPRESSION: | |
| from PIL import Image | |
| import os | |
| new_frame_images = [] | |
| for image in frame_images: | |
| Image.fromarray(image).save(f"tmp{idx}.jpg", quality=95) | |
| image = cv2.imread(f"tmp{idx}.jpg")[:, :, ::-1] # BGR to RGB | |
| os.remove(f"tmp{idx}.jpg") | |
| new_frame_images.append(image) | |
| frame_images = new_frame_images | |
| return self._postprocess(frame_images, frame_labels) | |
| class YOLOARISBatchedDataset(ARISBatchedDataset): | |
| """An ARIS Dataset that works with YOLOv5 inference.""" | |
| def __init__(self, aris_filepath, beam_width_dir, annotations_file, stride=64, pad=0.5, img_size=896, batch_size=32, | |
| disable_output=False, cache_bg_frames=False, num_frames_bg_subtract=1000): | |
| super().__init__(aris_filepath, beam_width_dir, annotations_file, batch_size, disable_output=disable_output, cache_bg_frames=cache_bg_frames, num_frames_bg_subtract=num_frames_bg_subtract) | |
| # compute shapes for letterbox | |
| aspect_ratio = self.ydim / self.xdim | |
| if aspect_ratio < 1: | |
| shape = [aspect_ratio, 1] | |
| elif aspect_ratio > 1: | |
| shape = [1, 1 / aspect_ratio] | |
| self.original_shape = (self.ydim, self.xdim) | |
| self.shape = np.ceil(np.array(shape) * img_size / stride + pad).astype(int) * stride | |
| def load_image(cls, img, img_size=896): | |
| """Loads and resizes 1 image from dataset, returns img, original hw, resized hw. | |
| Modified from ScaledYOLOv4.datasets.load_image() | |
| """ | |
| h0, w0 = img.shape[:2] # orig hw | |
| r = img_size / max(h0, w0) # resize image to img_size | |
| if r != 1: # always resize down, only resize up if training with augmentation | |
| interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR | |
| img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) | |
| return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized | |
| def _postprocess(self, frame_images, frame_labels): | |
| """ | |
| Return a batch of data in the format used by ScaledYOLOv4. | |
| That is, a list of tuples, on tuple per image in the batch: | |
| [ | |
| (img ->torch.Tensor, | |
| labels ->torch.Tensor, | |
| shapes ->tuple describing image original dimensions and scaled/padded dimensions | |
| ), | |
| ... | |
| ] | |
| """ | |
| outputs = [] | |
| frame_labels = frame_labels or [ None for _ in frame_images ] | |
| for image, x in zip(frame_images, frame_labels): | |
| img, (h0, w0), (h, w) = self.load_image(image) | |
| # Letterbox | |
| img, ratio, pad = letterbox(img, self.shape, auto=False, scaleup=False) | |
| shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling | |
| img = img.transpose(2, 0, 1) # to -> C x H x W | |
| img = np.ascontiguousarray(img) | |
| # Load labels | |
| # Convert from normalized xywh to pixel xyxy format in order to add padding from letterbox | |
| labels = [] | |
| if x is not None and x.size > 0: | |
| labels = x.copy() | |
| labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width | |
| labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height | |
| labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] | |
| labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] | |
| # convert back to normalized xywh with padding | |
| nL = len(labels) # number of labels | |
| labels_out = torch.zeros((nL, 6)) | |
| if nL: | |
| labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh | |
| labels[:, [2, 4]] /= img.shape[1] # normalized height 0-1 | |
| labels[:, [1, 3]] /= img.shape[2] # normalized width 0-1 | |
| labels_out[:, 1:] = torch.from_numpy(labels) | |
| outputs.append( (torch.from_numpy(img), labels_out, shapes) ) | |
| return outputs | |
| def torch_distributed_zero_first(local_rank: int): | |
| """ | |
| Decorator to make all processes in distributed training wait for each local_master to do something. | |
| """ | |
| if local_rank not in [-1, 0]: | |
| torch.distributed.barrier() | |
| yield | |
| if local_rank == 0: | |
| torch.distributed.barrier() | |
| class OnePerBatchSampler(torch.utils.data.Sampler): | |
| """Yields the first index of each batch, given a batch size. | |
| In other words, returns multiples of self.batch_size up to the size of the Dataset. | |
| This is a workaround for Pytorch's standard batch creation that allows us to manually | |
| select contiguous segments of an ARIS clip for each batch. | |
| """ | |
| def __init__(self, data_source, batch_size): | |
| self.data_source = data_source | |
| self.batch_size = batch_size | |
| def __iter__(self): | |
| idxs = [i*self.batch_size for i in range(len(self))] | |
| return iter(idxs) | |
| def __len__(self): | |
| return len(self.data_source) // self.batch_size | |
| def collate_fn(batch): | |
| """See YOLOv5.utils.datasets.collate_fn""" | |
| if not len(batch): | |
| print("help!") | |
| print(batch) | |
| img, label, shapes = zip(*batch) # transposed | |
| for i, l in enumerate(label): | |
| l[:, 0] = i # add target image index for build_targets() | |
| return torch.stack(img, 0), torch.cat(label, 0), shapes |