Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR) | |
| Copyright(c) 2023 lyuwenyu. All Rights Reserved. | |
| """ | |
| from sympy import im | |
| import torch | |
| import torchvision | |
| import torchvision.transforms.functional as TVF | |
| import os | |
| from PIL import Image | |
| from typing import Optional, Callable | |
| try: | |
| from defusedxml.ElementTree import parse as ET_parse | |
| except ImportError: | |
| from xml.etree.ElementTree import parse as ET_parse | |
| from ._dataset import DetDataset | |
| from .._misc import convert_to_tv_tensor | |
| from ...core import register | |
| class VOCDetection(torchvision.datasets.VOCDetection, DetDataset): | |
| __inject__ = ['transforms', ] | |
| def __init__(self, root: str, ann_file: str = "trainval.txt", label_file: str = "label_list.txt", transforms: Optional[Callable] = None): | |
| with open(os.path.join(root, ann_file), 'r') as f: | |
| lines = [x.strip() for x in f.readlines()] | |
| lines = [x.split(' ') for x in lines] | |
| self.images = [os.path.join(root, lin[0]) for lin in lines] | |
| self.targets = [os.path.join(root, lin[1]) for lin in lines] | |
| assert len(self.images) == len(self.targets) | |
| with open(os.path.join(root + label_file), 'r') as f: | |
| labels = f.readlines() | |
| labels = [lab.strip() for lab in labels] | |
| self.transforms = transforms | |
| self.labels_map = {lab: i for i, lab in enumerate(labels)} | |
| def __getitem__(self, index: int): | |
| image, target = self.load_item(index) | |
| if self.transforms is not None: | |
| image, target, _ = self.transforms(image, target, self) | |
| # target["orig_size"] = torch.tensor(TVF.get_image_size(image)) | |
| return image, target | |
| def load_item(self, index: int): | |
| image = Image.open(self.images[index]).convert("RGB") | |
| target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot()) | |
| output = {} | |
| output["image_id"] = torch.tensor([index]) | |
| for k in ['area', 'boxes', 'labels', 'iscrowd']: | |
| output[k] = [] | |
| for blob in target['annotation']['object']: | |
| box = [float(v) for v in blob['bndbox'].values()] | |
| output["boxes"].append(box) | |
| output["labels"].append(blob['name']) | |
| output["area"].append((box[2] - box[0]) * (box[3] - box[1])) | |
| output["iscrowd"].append(0) | |
| w, h = image.size | |
| boxes = torch.tensor(output["boxes"]) if len(output["boxes"]) > 0 else torch.zeros(0, 4) | |
| output['boxes'] = convert_to_tv_tensor(boxes, 'boxes', box_format='xyxy', spatial_size=[h, w]) | |
| output['labels'] = torch.tensor([self.labels_map[lab] for lab in output["labels"]]) | |
| output['area'] = torch.tensor(output['area']) | |
| output["iscrowd"] = torch.tensor(output["iscrowd"]) | |
| output["orig_size"] = torch.tensor([w, h]) | |
| return image, output | |