Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +7 -0
- Groma/mmdet/models/backbones/__pycache__/csp_darknet.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/darknet.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/detectors_resnet.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/detectors_resnext.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/hourglass.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/hrnet.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/regnet.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/res2net.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/resnest.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/resnet.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/resnext.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/ssd_vgg.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/swin.cpython-39.pyc +0 -0
- Groma/mmdet/models/backbones/__pycache__/trident_resnet.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/__init__.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/accuracy.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/ae_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/balanced_l1_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/dice_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/focal_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/gaussian_focal_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/gfocal_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/ghm_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/iou_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/kd_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/mse_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/pisa_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/seesaw_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/smooth_l1_loss.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/utils.cpython-39.pyc +0 -0
- Groma/mmdet/models/losses/__pycache__/varifocal_loss.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/collect_env.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/contextmanagers.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/logger.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/misc.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/setup_env.cpython-39.pyc +0 -0
- Groma/mmdet/utils/__pycache__/util_mixins.cpython-39.pyc +0 -0
- OpenSeeD/datasets/__init__.py +2 -0
- OpenSeeD/datasets/build.py +638 -0
- OpenSeeD/datasets/dataset_mappers/__init__.py +14 -0
- OpenSeeD/datasets/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py +191 -0
- OpenSeeD/datasets/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py +166 -0
- OpenSeeD/datasets/dataset_mappers/imagenet_dataset_mapper.py +95 -0
- OpenSeeD/datasets/dataset_mappers/lvis_dataset_mapper.py +170 -0
- OpenSeeD/datasets/dataset_mappers/mask_former_instance_dataset_mapper.py +184 -0
- OpenSeeD/datasets/dataset_mappers/mask_former_panoptic_dataset_mapper.py +168 -0
.gitattributes
CHANGED
|
@@ -584,3 +584,10 @@ Groma/mmcv/docs/en/_static/flow_warp.png filter=lfs diff=lfs merge=lfs -text
|
|
| 584 |
Groma/mmcv/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
|
| 585 |
Groma/mmcv/docs/en/_static/zhihu_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
|
| 586 |
Groma/mmcv/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
Groma/mmcv/docs/en/_static/flow_raw_images.png filter=lfs diff=lfs merge=lfs -text
|
| 585 |
Groma/mmcv/docs/en/_static/zhihu_qrcode.jpg filter=lfs diff=lfs merge=lfs -text
|
| 586 |
Groma/mmcv/docs/en/_static/community/3.png filter=lfs diff=lfs merge=lfs -text
|
| 587 |
+
OpenSeeD/figs/results1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 588 |
+
OpenSeeD/figs/framework.jpg filter=lfs diff=lfs merge=lfs -text
|
| 589 |
+
OpenSeeD/figs/cover.jpg filter=lfs diff=lfs merge=lfs -text
|
| 590 |
+
OpenSeeD/figs/results2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 591 |
+
OpenSeeD/figs/intro.jpg filter=lfs diff=lfs merge=lfs -text
|
| 592 |
+
OpenSeeD/images/animals.png filter=lfs diff=lfs merge=lfs -text
|
| 593 |
+
OpenSeeD/images/street.jpg filter=lfs diff=lfs merge=lfs -text
|
Groma/mmdet/models/backbones/__pycache__/csp_darknet.cpython-39.pyc
ADDED
|
Binary file (9.09 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/darknet.cpython-39.pyc
ADDED
|
Binary file (7.27 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/detectors_resnet.cpython-39.pyc
ADDED
|
Binary file (9.53 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/detectors_resnext.cpython-39.pyc
ADDED
|
Binary file (2.94 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/hourglass.cpython-39.pyc
ADDED
|
Binary file (6.32 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/hrnet.cpython-39.pyc
ADDED
|
Binary file (13.5 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/mobilenet_v2.cpython-39.pyc
ADDED
|
Binary file (5.84 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/regnet.cpython-39.pyc
ADDED
|
Binary file (11.1 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/res2net.cpython-39.pyc
ADDED
|
Binary file (8.79 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/resnest.cpython-39.pyc
ADDED
|
Binary file (8.9 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/resnet.cpython-39.pyc
ADDED
|
Binary file (17.4 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/resnext.cpython-39.pyc
ADDED
|
Binary file (4.72 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/ssd_vgg.cpython-39.pyc
ADDED
|
Binary file (4.36 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/swin.cpython-39.pyc
ADDED
|
Binary file (22.4 kB). View file
|
|
|
Groma/mmdet/models/backbones/__pycache__/trident_resnet.cpython-39.pyc
ADDED
|
Binary file (9.44 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/__init__.cpython-39.pyc
ADDED
|
Binary file (1.59 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/accuracy.cpython-39.pyc
ADDED
|
Binary file (3.22 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/ae_loss.cpython-39.pyc
ADDED
|
Binary file (3.6 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/balanced_l1_loss.cpython-39.pyc
ADDED
|
Binary file (4.1 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/cross_entropy_loss.cpython-39.pyc
ADDED
|
Binary file (7.61 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/dice_loss.cpython-39.pyc
ADDED
|
Binary file (4.86 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/focal_loss.cpython-39.pyc
ADDED
|
Binary file (7.34 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/gaussian_focal_loss.cpython-39.pyc
ADDED
|
Binary file (3.33 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/gfocal_loss.cpython-39.pyc
ADDED
|
Binary file (8.41 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/ghm_loss.cpython-39.pyc
ADDED
|
Binary file (6.33 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/iou_loss.cpython-39.pyc
ADDED
|
Binary file (12.5 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/kd_loss.cpython-39.pyc
ADDED
|
Binary file (2.91 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/mse_loss.cpython-39.pyc
ADDED
|
Binary file (2.13 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/pisa_loss.cpython-39.pyc
ADDED
|
Binary file (4.42 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/seesaw_loss.cpython-39.pyc
ADDED
|
Binary file (7.76 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/smooth_l1_loss.cpython-39.pyc
ADDED
|
Binary file (3.95 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/utils.cpython-39.pyc
ADDED
|
Binary file (2.76 kB). View file
|
|
|
Groma/mmdet/models/losses/__pycache__/varifocal_loss.cpython-39.pyc
ADDED
|
Binary file (4.77 kB). View file
|
|
|
Groma/mmdet/utils/__pycache__/__init__.cpython-39.pyc
ADDED
|
Binary file (358 Bytes). View file
|
|
|
Groma/mmdet/utils/__pycache__/collect_env.cpython-39.pyc
ADDED
|
Binary file (589 Bytes). View file
|
|
|
Groma/mmdet/utils/__pycache__/contextmanagers.cpython-39.pyc
ADDED
|
Binary file (3.55 kB). View file
|
|
|
Groma/mmdet/utils/__pycache__/logger.cpython-39.pyc
ADDED
|
Binary file (649 Bytes). View file
|
|
|
Groma/mmdet/utils/__pycache__/misc.cpython-39.pyc
ADDED
|
Binary file (1.17 kB). View file
|
|
|
Groma/mmdet/utils/__pycache__/setup_env.cpython-39.pyc
ADDED
|
Binary file (1.49 kB). View file
|
|
|
Groma/mmdet/utils/__pycache__/util_mixins.cpython-39.pyc
ADDED
|
Binary file (3.75 kB). View file
|
|
|
OpenSeeD/datasets/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from . import registration
|
| 2 |
+
from .build import *
|
OpenSeeD/datasets/build.py
ADDED
|
@@ -0,0 +1,638 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import os
|
| 3 |
+
import itertools
|
| 4 |
+
import logging
|
| 5 |
+
import copy
|
| 6 |
+
from typing import Any, Callable, Dict, List, Optional, Union
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import torch.utils.data
|
| 10 |
+
import torch.utils.data as torchdata
|
| 11 |
+
|
| 12 |
+
import detectron2.utils.comm as comm
|
| 13 |
+
from detectron2.data.build import (
|
| 14 |
+
build_batch_data_loader,
|
| 15 |
+
load_proposals_into_dataset,
|
| 16 |
+
trivial_batch_collator,
|
| 17 |
+
)
|
| 18 |
+
from detectron2.data import MetadataCatalog
|
| 19 |
+
from detectron2.data.catalog import DatasetCatalog
|
| 20 |
+
from detectron2.data.common import DatasetFromList, MapDataset
|
| 21 |
+
from detectron2.data.dataset_mapper import DatasetMapper
|
| 22 |
+
from detectron2.data.samplers import InferenceSampler, TrainingSampler
|
| 23 |
+
from detectron2.evaluation import (
|
| 24 |
+
CityscapesInstanceEvaluator,
|
| 25 |
+
CityscapesSemSegEvaluator,
|
| 26 |
+
COCOEvaluator,
|
| 27 |
+
DatasetEvaluators,
|
| 28 |
+
LVISEvaluator,
|
| 29 |
+
verify_results,
|
| 30 |
+
)
|
| 31 |
+
from fvcore.common.config import CfgNode
|
| 32 |
+
from omegaconf import DictConfig, OmegaConf
|
| 33 |
+
|
| 34 |
+
from .dataset_mappers import (
|
| 35 |
+
COCOInstanceNewBaselineDatasetMapper,
|
| 36 |
+
COCOPanopticNewBaselineDatasetMapper,
|
| 37 |
+
MaskFormerInstanceDatasetMapper,
|
| 38 |
+
MaskFormerPanopticDatasetMapper,
|
| 39 |
+
MaskFormerSemanticDatasetMapper,
|
| 40 |
+
ImageNetDatasetMapper,
|
| 41 |
+
VLPreDatasetMapper,
|
| 42 |
+
SunRGBDSegDatasetMapper,
|
| 43 |
+
ScanNetSegDatasetMapper,
|
| 44 |
+
BDDSemDatasetMapper,
|
| 45 |
+
ScanNetPanoDatasetMapper,
|
| 46 |
+
RefCOCODatasetMapper,
|
| 47 |
+
O365InstanceNewBaselineDatasetMapper,
|
| 48 |
+
)
|
| 49 |
+
from .evaluation import (InstanceSegEvaluator,
|
| 50 |
+
SemSegEvaluator,
|
| 51 |
+
COCOPanopticEvaluator,
|
| 52 |
+
)
|
| 53 |
+
from openseed.utils import configurable
|
| 54 |
+
from detectron2.utils.comm import get_world_size
|
| 55 |
+
from typing import Any, Dict, List, Set
|
| 56 |
+
|
| 57 |
+
class JointLoader(torchdata.IterableDataset):
|
| 58 |
+
def __init__(self, loaders, key_dataset):
|
| 59 |
+
dataset_names = []
|
| 60 |
+
for key, loader in loaders.items():
|
| 61 |
+
name = "{}".format(key.split('_')[0])
|
| 62 |
+
setattr(self, name, loader)
|
| 63 |
+
dataset_names += [name]
|
| 64 |
+
self.dataset_names = dataset_names
|
| 65 |
+
self.key_dataset = key_dataset
|
| 66 |
+
|
| 67 |
+
def __iter__(self):
|
| 68 |
+
for batch in zip(*[getattr(self, name) for name in self.dataset_names]):
|
| 69 |
+
yield {key: batch[i] for i, key in enumerate(self.dataset_names)}
|
| 70 |
+
|
| 71 |
+
def __len__(self):
|
| 72 |
+
return len(getattr(self, self.key_dataset))
|
| 73 |
+
|
| 74 |
+
def filter_images_with_only_crowd_annotations(dataset_dicts, dataset_names):
|
| 75 |
+
"""
|
| 76 |
+
Filter out images with none annotations or only crowd annotations
|
| 77 |
+
(i.e., images without non-crowd annotations).
|
| 78 |
+
A common training-time preprocessing on COCO dataset.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
dataset_dicts (list[dict]): annotations in Detectron2 Dataset format.
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
list[dict]: the same format, but filtered.
|
| 85 |
+
"""
|
| 86 |
+
num_before = len(dataset_dicts)
|
| 87 |
+
|
| 88 |
+
def valid(anns):
|
| 89 |
+
for ann in anns:
|
| 90 |
+
if isinstance(ann, list):
|
| 91 |
+
for instance in ann:
|
| 92 |
+
if instance.get("iscrowd", 0) == 0:
|
| 93 |
+
return True
|
| 94 |
+
else:
|
| 95 |
+
if ann.get("iscrowd", 0) == 0:
|
| 96 |
+
return True
|
| 97 |
+
return False
|
| 98 |
+
|
| 99 |
+
dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])]
|
| 100 |
+
num_after = len(dataset_dicts)
|
| 101 |
+
logger = logging.getLogger(__name__)
|
| 102 |
+
logger.info(
|
| 103 |
+
"Removed {} images with no usable annotations. {} images left.".format(
|
| 104 |
+
num_before - num_after, num_after
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
return dataset_dicts
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def get_detection_dataset_dicts(
|
| 111 |
+
dataset_names, filter_empty=True, proposal_files=None
|
| 112 |
+
):
|
| 113 |
+
"""
|
| 114 |
+
Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation.
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
dataset_names (str or list[str]): a dataset name or a list of dataset names
|
| 118 |
+
filter_empty (bool): whether to filter out images without instance annotations
|
| 119 |
+
proposal_files (list[str]): if given, a list of object proposal files
|
| 120 |
+
that match each dataset in `dataset_names`.
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
list[dict]: a list of dicts following the standard dataset dict format.
|
| 124 |
+
"""
|
| 125 |
+
if isinstance(dataset_names, str):
|
| 126 |
+
dataset_names = [dataset_names]
|
| 127 |
+
assert len(dataset_names)
|
| 128 |
+
|
| 129 |
+
dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
|
| 130 |
+
for dataset_name, dicts in zip(dataset_names, dataset_dicts):
|
| 131 |
+
assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
|
| 132 |
+
|
| 133 |
+
if proposal_files is not None:
|
| 134 |
+
assert len(dataset_names) == len(proposal_files)
|
| 135 |
+
# load precomputed proposals from proposal files
|
| 136 |
+
dataset_dicts = [
|
| 137 |
+
load_proposals_into_dataset(dataset_i_dicts, proposal_file)
|
| 138 |
+
for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files)
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
|
| 142 |
+
|
| 143 |
+
has_instances = "annotations" in dataset_dicts[0]
|
| 144 |
+
if filter_empty and has_instances:
|
| 145 |
+
dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts, dataset_names)
|
| 146 |
+
|
| 147 |
+
assert len(dataset_dicts), "No valid data found in {}.".format(",".join(dataset_names))
|
| 148 |
+
return dataset_dicts
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _test_loader_from_config(cfg, dataset_name, mapper=None):
|
| 152 |
+
"""
|
| 153 |
+
Uses the given `dataset_name` argument (instead of the names in cfg), because the
|
| 154 |
+
standard practice is to evaluate each test set individually (not combining them).
|
| 155 |
+
"""
|
| 156 |
+
if isinstance(dataset_name, str):
|
| 157 |
+
dataset_name = [dataset_name]
|
| 158 |
+
|
| 159 |
+
dataset = get_detection_dataset_dicts(
|
| 160 |
+
dataset_name,
|
| 161 |
+
filter_empty=False,
|
| 162 |
+
proposal_files=None,
|
| 163 |
+
)
|
| 164 |
+
# import ipdb;ipdb.set_trace()
|
| 165 |
+
if mapper is None:
|
| 166 |
+
if isinstance(cfg, (DictConfig)):
|
| 167 |
+
cfg = OmegaConf.to_container(copy.deepcopy(cfg))
|
| 168 |
+
mapper_cfg = CfgNode({'INPUT': cfg['INPUT'], 'MODEL': cfg['MODEL'], 'DATASETS': cfg['DATASETS']})
|
| 169 |
+
mapper = DatasetMapper(mapper_cfg, False)
|
| 170 |
+
assert cfg['TEST']['BATCH_SIZE_TOTAL'] % get_world_size() == 0, "Evaluation total batchsize is not divisible by gpu number"
|
| 171 |
+
batch_size = cfg['TEST']['BATCH_SIZE_TOTAL'] // get_world_size()
|
| 172 |
+
|
| 173 |
+
return {
|
| 174 |
+
"dataset": dataset,
|
| 175 |
+
"mapper": mapper,
|
| 176 |
+
"num_workers": cfg['DATALOADER']['NUM_WORKERS'],
|
| 177 |
+
"sampler": InferenceSampler(len(dataset)),
|
| 178 |
+
"batch_size": batch_size,
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
@configurable(from_config=_test_loader_from_config)
|
| 183 |
+
def build_detection_test_loader(
|
| 184 |
+
dataset: Union[List[Any], torchdata.Dataset],
|
| 185 |
+
*,
|
| 186 |
+
mapper: Callable[[Dict[str, Any]], Any],
|
| 187 |
+
sampler: Optional[torchdata.Sampler] = None,
|
| 188 |
+
batch_size: int = 1,
|
| 189 |
+
num_workers: int = 0,
|
| 190 |
+
collate_fn: Optional[Callable[[List[Any]], Any]] = None,
|
| 191 |
+
) -> torchdata.DataLoader:
|
| 192 |
+
"""
|
| 193 |
+
Similar to `build_detection_train_loader`, with default batch size = 1,
|
| 194 |
+
and sampler = :class:`InferenceSampler`. This sampler coordinates all workers
|
| 195 |
+
to produce the exact set of all samples.
|
| 196 |
+
|
| 197 |
+
Args:
|
| 198 |
+
dataset: a list of dataset dicts,
|
| 199 |
+
or a pytorch dataset (either map-style or iterable). They can be obtained
|
| 200 |
+
by using :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
|
| 201 |
+
mapper: a callable which takes a sample (dict) from dataset
|
| 202 |
+
and returns the format to be consumed by the model.
|
| 203 |
+
When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
|
| 204 |
+
sampler: a sampler that produces
|
| 205 |
+
indices to be applied on ``dataset``. Default to :class:`InferenceSampler`,
|
| 206 |
+
which splits the dataset across all workers. Sampler must be None
|
| 207 |
+
if `dataset` is iterable.
|
| 208 |
+
batch_size: the batch size of the data loader to be created.
|
| 209 |
+
Default to 1 image per worker since this is the standard when reporting
|
| 210 |
+
inference time in papers.
|
| 211 |
+
num_workers: number of parallel data loading workers
|
| 212 |
+
collate_fn: same as the argument of `torch.utils.data.DataLoader`.
|
| 213 |
+
Defaults to do no collation and return a list of data.
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
DataLoader: a torch DataLoader, that loads the given detection
|
| 217 |
+
dataset, with test-time transformation and batching.
|
| 218 |
+
|
| 219 |
+
Examples:
|
| 220 |
+
::
|
| 221 |
+
data_loader = build_detection_test_loader(
|
| 222 |
+
DatasetRegistry.get("my_test"),
|
| 223 |
+
mapper=DatasetMapper(...))
|
| 224 |
+
|
| 225 |
+
# or, instantiate with a CfgNode:
|
| 226 |
+
data_loader = build_detection_test_loader(cfg, "my_test")
|
| 227 |
+
"""
|
| 228 |
+
|
| 229 |
+
if isinstance(dataset, list):
|
| 230 |
+
dataset = DatasetFromList(dataset, copy=False)
|
| 231 |
+
if mapper is not None:
|
| 232 |
+
dataset = MapDataset(dataset, mapper)
|
| 233 |
+
if isinstance(dataset, torchdata.IterableDataset):
|
| 234 |
+
assert sampler is None, "sampler must be None if dataset is IterableDataset"
|
| 235 |
+
else:
|
| 236 |
+
if sampler is None:
|
| 237 |
+
sampler = InferenceSampler(len(dataset))
|
| 238 |
+
return torchdata.DataLoader(
|
| 239 |
+
dataset,
|
| 240 |
+
batch_size=batch_size,
|
| 241 |
+
sampler=sampler,
|
| 242 |
+
drop_last=False,
|
| 243 |
+
num_workers=num_workers,
|
| 244 |
+
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def _train_loader_from_config(cfg, dataset_name, mapper, *, dataset=None, sampler=None):
|
| 249 |
+
cfg_datasets = cfg['DATASETS']
|
| 250 |
+
cfg_dataloader = cfg['DATALOADER']
|
| 251 |
+
|
| 252 |
+
if dataset is None:
|
| 253 |
+
dataset = get_detection_dataset_dicts(
|
| 254 |
+
dataset_name,
|
| 255 |
+
filter_empty=cfg_dataloader['FILTER_EMPTY_ANNOTATIONS'],
|
| 256 |
+
proposal_files=cfg_datasets['PROPOSAL_FILES_TRAIN'] if cfg_dataloader['LOAD_PROPOSALS'] else None,
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
if mapper is None:
|
| 260 |
+
mapper = DatasetMapper(cfg, True)
|
| 261 |
+
|
| 262 |
+
if sampler is None:
|
| 263 |
+
sampler_name = cfg_dataloader['SAMPLER_TRAIN']
|
| 264 |
+
logger = logging.getLogger(__name__)
|
| 265 |
+
logger.info("Using training sampler {}".format(sampler_name))
|
| 266 |
+
sampler = TrainingSampler(len(dataset))
|
| 267 |
+
|
| 268 |
+
return {
|
| 269 |
+
"dataset": dataset,
|
| 270 |
+
"sampler": sampler,
|
| 271 |
+
"mapper": mapper,
|
| 272 |
+
"total_batch_size": cfg['TRAIN']['BATCH_SIZE_TOTAL'],
|
| 273 |
+
"aspect_ratio_grouping": cfg_dataloader['ASPECT_RATIO_GROUPING'],
|
| 274 |
+
"num_workers": cfg_dataloader['NUM_WORKERS'],
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
@configurable(from_config=_train_loader_from_config)
|
| 279 |
+
def build_detection_train_loader(
|
| 280 |
+
dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
|
| 281 |
+
):
|
| 282 |
+
"""
|
| 283 |
+
Build a dataloader for object detection with some default features.
|
| 284 |
+
This interface is experimental.
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
|
| 288 |
+
or a map-style pytorch dataset. They can be obtained by using
|
| 289 |
+
:func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
|
| 290 |
+
mapper (callable): a callable which takes a sample (dict) from dataset and
|
| 291 |
+
returns the format to be consumed by the model.
|
| 292 |
+
When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
|
| 293 |
+
sampler (torch.utils.data.sampler.Sampler or None): a sampler that
|
| 294 |
+
produces indices to be applied on ``dataset``.
|
| 295 |
+
Default to :class:`TrainingSampler`, which coordinates a random shuffle
|
| 296 |
+
sequence across all workers.
|
| 297 |
+
total_batch_size (int): total batch size across all workers. Batching
|
| 298 |
+
simply puts data into a list.
|
| 299 |
+
aspect_ratio_grouping (bool): whether to group images with similar
|
| 300 |
+
aspect ratio for efficiency. When enabled, it requires each
|
| 301 |
+
element in dataset be a dict with keys "width" and "height".
|
| 302 |
+
num_workers (int): number of parallel data loading workers
|
| 303 |
+
|
| 304 |
+
Returns:
|
| 305 |
+
torch.utils.data.DataLoader: a dataloader. Each output from it is a
|
| 306 |
+
``list[mapped_element]`` of length ``total_batch_size / num_workers``,
|
| 307 |
+
where ``mapped_element`` is produced by the ``mapper``.
|
| 308 |
+
"""
|
| 309 |
+
if isinstance(dataset, list):
|
| 310 |
+
dataset = DatasetFromList(dataset, copy=False)
|
| 311 |
+
if mapper is not None:
|
| 312 |
+
dataset = MapDataset(dataset, mapper)
|
| 313 |
+
if sampler is None:
|
| 314 |
+
sampler = TrainingSampler(len(dataset))
|
| 315 |
+
assert isinstance(sampler, torch.utils.data.sampler.Sampler)
|
| 316 |
+
return build_batch_data_loader(
|
| 317 |
+
dataset,
|
| 318 |
+
sampler,
|
| 319 |
+
total_batch_size,
|
| 320 |
+
aspect_ratio_grouping=aspect_ratio_grouping,
|
| 321 |
+
num_workers=num_workers,
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def get_config_from_name(cfg, dataset_name):
|
| 326 |
+
# adjust config according to dataset
|
| 327 |
+
if 'refcoco' in dataset_name:
|
| 328 |
+
cfg.update(cfg['REF'])
|
| 329 |
+
return cfg
|
| 330 |
+
elif 'coco' in dataset_name:
|
| 331 |
+
if 'COCO' in cfg.keys():
|
| 332 |
+
cfg.update(cfg['COCO'])
|
| 333 |
+
return cfg
|
| 334 |
+
elif 'ade' in dataset_name:
|
| 335 |
+
if 'ADE20K' in cfg.keys():
|
| 336 |
+
cfg.update(cfg['ADE20K'])
|
| 337 |
+
return cfg
|
| 338 |
+
elif 'imagenet' in dataset_name:
|
| 339 |
+
if 'IMAGENET' in cfg.keys():
|
| 340 |
+
cfg.update(cfg['IMAGENET'])
|
| 341 |
+
return cfg
|
| 342 |
+
elif 'vlp' in dataset_name:
|
| 343 |
+
cfg.update(cfg['VLP'])
|
| 344 |
+
return cfg
|
| 345 |
+
elif 'sun' in dataset_name:
|
| 346 |
+
cfg.update(cfg['SUN'])
|
| 347 |
+
return cfg
|
| 348 |
+
elif 'object365' in dataset_name:
|
| 349 |
+
cfg.update(cfg['OBJECT365'])
|
| 350 |
+
return cfg
|
| 351 |
+
elif 'scan' in dataset_name:
|
| 352 |
+
cfg.update(cfg['SCAN'])
|
| 353 |
+
return cfg
|
| 354 |
+
elif 'cityscape' in dataset_name:
|
| 355 |
+
cfg.update(cfg['CITY'])
|
| 356 |
+
return cfg
|
| 357 |
+
elif 'bdd' in dataset_name:
|
| 358 |
+
cfg.update(cfg['BDD'])
|
| 359 |
+
return cfg
|
| 360 |
+
else:
|
| 361 |
+
assert False, "dataset not support."
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
def build_eval_dataloader(cfg, ):
|
| 365 |
+
dataloaders = []
|
| 366 |
+
cfg = copy.deepcopy(cfg)
|
| 367 |
+
for dataset_name in cfg['DATASETS']['TEST']:
|
| 368 |
+
cfg = get_config_from_name(cfg, dataset_name)
|
| 369 |
+
# adjust mapper according to dataset
|
| 370 |
+
if dataset_name == 'imagenet_val':
|
| 371 |
+
mapper = ImageNetDatasetMapper(cfg, False)
|
| 372 |
+
elif dataset_name == 'bdd10k_val_sem_seg':
|
| 373 |
+
mapper = BDDSemDatasetMapper(cfg, False)
|
| 374 |
+
elif dataset_name in ["vlp_val", "vlp_captioning_val", "vlp_val2017", "vlp_captioning_val2017"]:
|
| 375 |
+
mapper = VLPreDatasetMapper(cfg, False, dataset_name)
|
| 376 |
+
elif dataset_name in ["scannet_21_val_seg", "scannet_38_val_seg", "scannet_41_val_seg"]:
|
| 377 |
+
mapper = ScanNetSegDatasetMapper(cfg, False)
|
| 378 |
+
elif dataset_name in ["scannet_21_panoptic_val", 'bdd10k_40_panoptic_val']:
|
| 379 |
+
mapper = ScanNetPanoDatasetMapper(cfg, False)
|
| 380 |
+
elif 'sun' in dataset_name:
|
| 381 |
+
mapper = SunRGBDSegDatasetMapper(cfg, False)
|
| 382 |
+
elif 'refcoco' in dataset_name:
|
| 383 |
+
mapper = RefCOCODatasetMapper(cfg, False)
|
| 384 |
+
else:
|
| 385 |
+
mapper = None
|
| 386 |
+
dataloaders += [build_detection_test_loader(cfg, dataset_name, mapper=mapper)]
|
| 387 |
+
# dataloaders = build_detection_test_loader(cfg, dataset_name, mapper=mapper)
|
| 388 |
+
return dataloaders
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
def build_train_dataloader(cfg, ):
|
| 392 |
+
dataset_names = cfg['DATASETS']['TRAIN']
|
| 393 |
+
|
| 394 |
+
loaders = {}
|
| 395 |
+
cfg = copy.deepcopy(cfg)
|
| 396 |
+
for dataset_name in dataset_names:
|
| 397 |
+
cfg = get_config_from_name(cfg, dataset_name)
|
| 398 |
+
mapper_name = cfg['INPUT']['DATASET_MAPPER_NAME']
|
| 399 |
+
# Semantic segmentation dataset mapper
|
| 400 |
+
if mapper_name == "mask_former_semantic":
|
| 401 |
+
mapper = MaskFormerSemanticDatasetMapper(cfg, True)
|
| 402 |
+
loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 403 |
+
# Panoptic segmentation dataset mapper
|
| 404 |
+
elif mapper_name == "mask_former_panoptic": # TODO: Hack for ade training; should add ade name
|
| 405 |
+
mapper = MaskFormerPanopticDatasetMapper(cfg, True)
|
| 406 |
+
loaders['ade'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 407 |
+
# Instance segmentation dataset mapper
|
| 408 |
+
elif mapper_name == "mask_former_instance":
|
| 409 |
+
mapper = MaskFormerInstanceDatasetMapper(cfg, True)
|
| 410 |
+
loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 411 |
+
# coco instance segmentation lsj new baseline
|
| 412 |
+
elif mapper_name == "coco_instance_lsj":
|
| 413 |
+
mapper = COCOInstanceNewBaselineDatasetMapper(cfg, True)
|
| 414 |
+
loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 415 |
+
# coco panoptic segmentation lsj new baseline
|
| 416 |
+
elif mapper_name == "coco_panoptic_lsj":
|
| 417 |
+
mapper = COCOPanopticNewBaselineDatasetMapper(cfg, True)
|
| 418 |
+
loaders['coco'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 419 |
+
|
| 420 |
+
elif mapper_name == "object365":
|
| 421 |
+
mapper = O365InstanceNewBaselineDatasetMapper(cfg, True) # Use lsj instance mapper for o365
|
| 422 |
+
loaders['o365'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 423 |
+
elif mapper_name == "vlpretrain":
|
| 424 |
+
mapper = VLPreDatasetMapper(cfg, True, dataset_name)
|
| 425 |
+
loaders['vlp'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 426 |
+
elif mapper_name == "refcoco":
|
| 427 |
+
mapper = RefCOCODatasetMapper(cfg, True)
|
| 428 |
+
loaders['ref'] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 429 |
+
else:
|
| 430 |
+
mapper = None
|
| 431 |
+
loaders[dataset_name] = build_detection_train_loader(cfg, dataset_name=dataset_name, mapper=mapper)
|
| 432 |
+
# import ipdb; ipdb.set_trace()
|
| 433 |
+
if len(loaders) == 1 and not cfg['LOADER'].get('JOINT', False):
|
| 434 |
+
for k, v in loaders.items():
|
| 435 |
+
print("number of iterations per epoch: ", v, len(loaders[k]))
|
| 436 |
+
return list(loaders.values())[0]
|
| 437 |
+
# return loaders.values()['coco']
|
| 438 |
+
# return loaders['coco']
|
| 439 |
+
else:
|
| 440 |
+
return JointLoader(loaders, key_dataset=cfg['LOADER'].get('KEY_DATASET', 'coco'))
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
def build_evaluator(cfg, dataset_name, output_folder=None):
|
| 444 |
+
"""
|
| 445 |
+
Create evaluator(s) for a given dataset.
|
| 446 |
+
This uses the special metadata "evaluator_type" associated with each
|
| 447 |
+
builtin dataset. For your own dataset, you can simply create an
|
| 448 |
+
evaluator manually in your script and do not have to worry about the
|
| 449 |
+
hacky if-else logic here.
|
| 450 |
+
"""
|
| 451 |
+
if output_folder is None:
|
| 452 |
+
output_folder = os.path.join(cfg["OUTPUT_DIR"], "inference")
|
| 453 |
+
evaluator_list = []
|
| 454 |
+
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
|
| 455 |
+
|
| 456 |
+
# semantic segmentation
|
| 457 |
+
if evaluator_type in ["sem_seg", "ade20k_panoptic_seg"]:
|
| 458 |
+
evaluator_list.append(
|
| 459 |
+
SemSegEvaluator(
|
| 460 |
+
dataset_name,
|
| 461 |
+
distributed=True,
|
| 462 |
+
output_dir=output_folder,
|
| 463 |
+
)
|
| 464 |
+
)
|
| 465 |
+
# instance segmentation
|
| 466 |
+
if evaluator_type == "coco":
|
| 467 |
+
evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
|
| 468 |
+
|
| 469 |
+
cfg_model_decoder_test = cfg["MODEL"]["DECODER"]["TEST"]
|
| 470 |
+
# panoptic segmentation
|
| 471 |
+
if evaluator_type in [
|
| 472 |
+
"coco_panoptic_seg",
|
| 473 |
+
"ade20k_panoptic_seg",
|
| 474 |
+
"cityscapes_panoptic_seg",
|
| 475 |
+
"mapillary_vistas_panoptic_seg",
|
| 476 |
+
"scannet_panoptic_seg",
|
| 477 |
+
"bdd_panoptic_pano"
|
| 478 |
+
]:
|
| 479 |
+
if cfg_model_decoder_test["PANOPTIC_ON"]:
|
| 480 |
+
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
|
| 481 |
+
# COCO
|
| 482 |
+
if (evaluator_type == "coco_panoptic_seg" and cfg_model_decoder_test["INSTANCE_ON"]) or evaluator_type == "object365_od":
|
| 483 |
+
evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
|
| 484 |
+
if (evaluator_type == "coco_panoptic_seg" and cfg_model_decoder_test["SEMANTIC_ON"]) or evaluator_type == "coco_sem_seg":
|
| 485 |
+
evaluator_list.append(SemSegEvaluator(dataset_name, distributed=True, output_dir=output_folder))
|
| 486 |
+
# Mapillary Vistas
|
| 487 |
+
if evaluator_type == "mapillary_vistas_panoptic_seg" and cfg_model_decoder_test["INSTANCE_ON"]:
|
| 488 |
+
evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder))
|
| 489 |
+
if evaluator_type == "mapillary_vistas_panoptic_seg" and cfg_model_decoder_test["SEMANTIC_ON"]:
|
| 490 |
+
evaluator_list.append(SemSegEvaluator(dataset_name, distributed=True, output_dir=output_folder))
|
| 491 |
+
# Cityscapes
|
| 492 |
+
if evaluator_type == "cityscapes_instance":
|
| 493 |
+
assert (
|
| 494 |
+
torch.cuda.device_count() > comm.get_rank()
|
| 495 |
+
), "CityscapesEvaluator currently do not work with multiple machines."
|
| 496 |
+
return CityscapesInstanceEvaluator(dataset_name)
|
| 497 |
+
if evaluator_type == "cityscapes_sem_seg":
|
| 498 |
+
assert (
|
| 499 |
+
torch.cuda.device_count() > comm.get_rank()
|
| 500 |
+
), "CityscapesEvaluator currently do not work with multiple machines."
|
| 501 |
+
return CityscapesSemSegEvaluator(dataset_name)
|
| 502 |
+
if evaluator_type == "cityscapes_panoptic_seg":
|
| 503 |
+
if cfg_model_decoder_test["SEMANTIC_ON"]:
|
| 504 |
+
assert (
|
| 505 |
+
torch.cuda.device_count() > comm.get_rank()
|
| 506 |
+
), "CityscapesEvaluator currently do not work with multiple machines."
|
| 507 |
+
evaluator_list.append(CityscapesSemSegEvaluator(dataset_name))
|
| 508 |
+
if cfg_model_decoder_test["INSTANCE_ON"]:
|
| 509 |
+
assert (
|
| 510 |
+
torch.cuda.device_count() > comm.get_rank()
|
| 511 |
+
), "CityscapesEvaluator currently do not work with multiple machines."
|
| 512 |
+
evaluator_list.append(CityscapesInstanceEvaluator(dataset_name))
|
| 513 |
+
# ADE20K
|
| 514 |
+
if evaluator_type == "ade20k_panoptic_seg" and cfg_model_decoder_test["INSTANCE_ON"]:
|
| 515 |
+
evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder))
|
| 516 |
+
# SEGINW
|
| 517 |
+
if evaluator_type == "seginw" and cfg_model_decoder_test["INSTANCE_ON"]:
|
| 518 |
+
evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder))
|
| 519 |
+
# LVIS
|
| 520 |
+
if evaluator_type == "lvis":
|
| 521 |
+
return LVISEvaluator(dataset_name, output_dir=output_folder)
|
| 522 |
+
# Classification
|
| 523 |
+
if evaluator_type == "classification":
|
| 524 |
+
evaluator_list.append(ClassificationEvaluator(dataset_name, output_folder))
|
| 525 |
+
# Retrieval
|
| 526 |
+
if evaluator_type == "retrieval":
|
| 527 |
+
evaluator_list.append(RetrievalEvaluator(dataset_name, output_folder, cfg['MODEL']['DECODER']['RETRIEVAL']['ENSEMBLE']))
|
| 528 |
+
if evaluator_type == "captioning":
|
| 529 |
+
evaluator_list.append(CaptioningEvaluator(dataset_name, output_folder, MetadataCatalog.get(dataset_name).gt_json))
|
| 530 |
+
if evaluator_type in ["grounding_refcoco", "grounding_phrasecut"]:
|
| 531 |
+
evaluator_list.append(GroundingEvaluator(dataset_name))
|
| 532 |
+
|
| 533 |
+
if len(evaluator_list) == 0:
|
| 534 |
+
raise NotImplementedError(
|
| 535 |
+
"no Evaluator for the dataset {} with the type {}".format(
|
| 536 |
+
dataset_name, evaluator_type
|
| 537 |
+
)
|
| 538 |
+
)
|
| 539 |
+
elif len(evaluator_list) == 1:
|
| 540 |
+
return evaluator_list[0]
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
return DatasetEvaluators(evaluator_list)
|
| 544 |
+
|
| 545 |
+
|
| 546 |
+
def build_optimizer(cls, cfg, model):
|
| 547 |
+
cfg_solver = cfg['SOLVER']
|
| 548 |
+
weight_decay_norm = cfg_solver['WEIGHT_DECAY_NORM']
|
| 549 |
+
weight_decay_embed = cfg_solver['WEIGHT_DECAY_EMBED']
|
| 550 |
+
weight_decay_bias = cfg_solver.get('WEIGHT_DECAY_BIAS', 0.0)
|
| 551 |
+
|
| 552 |
+
defaults = {}
|
| 553 |
+
defaults["lr"] = cfg_solver['BASE_LR']
|
| 554 |
+
defaults["weight_decay"] = cfg_solver['WEIGHT_DECAY']
|
| 555 |
+
|
| 556 |
+
norm_module_types = (
|
| 557 |
+
torch.nn.BatchNorm1d,
|
| 558 |
+
torch.nn.BatchNorm2d,
|
| 559 |
+
torch.nn.BatchNorm3d,
|
| 560 |
+
torch.nn.SyncBatchNorm,
|
| 561 |
+
# NaiveSyncBatchNorm inherits from BatchNorm2d
|
| 562 |
+
torch.nn.GroupNorm,
|
| 563 |
+
torch.nn.InstanceNorm1d,
|
| 564 |
+
torch.nn.InstanceNorm2d,
|
| 565 |
+
torch.nn.InstanceNorm3d,
|
| 566 |
+
torch.nn.LayerNorm,
|
| 567 |
+
torch.nn.LocalResponseNorm,
|
| 568 |
+
)
|
| 569 |
+
|
| 570 |
+
lr_multiplier = cfg['SOLVER']['LR_MULTIPLIER']
|
| 571 |
+
|
| 572 |
+
# for _module_name in model.module_names:
|
| 573 |
+
# # parameters = self.raw_modules[module_name].get_training_parameters()
|
| 574 |
+
# # self.optimizers[module_name] = optimizer_class(parameters, **optimizer_parameters)
|
| 575 |
+
# # params = []
|
| 576 |
+
# # for module_param_name, value in self.raw_modules[module_name].named_parameters(recurse=True):
|
| 577 |
+
params: List[Dict[str, Any]] = []
|
| 578 |
+
memo: Set[torch.nn.parameter.Parameter] = set()
|
| 579 |
+
for module_name, module in model.named_modules():
|
| 580 |
+
for module_param_name, value in module.named_parameters(recurse=False):
|
| 581 |
+
if not value.requires_grad:
|
| 582 |
+
continue
|
| 583 |
+
# Avoid duplicating parameters
|
| 584 |
+
if value in memo:
|
| 585 |
+
continue
|
| 586 |
+
memo.add(value)
|
| 587 |
+
|
| 588 |
+
hyperparams = copy.copy(defaults)
|
| 589 |
+
|
| 590 |
+
for key, lr_mul in lr_multiplier.items():
|
| 591 |
+
if key in "{}.{}".format(module_name, module_param_name):
|
| 592 |
+
hyperparams["lr"] = hyperparams["lr"] * lr_mul
|
| 593 |
+
if is_main_process():
|
| 594 |
+
logger.info("Modify Learning rate of {}: {}".format(
|
| 595 |
+
"{}.{}".format(module_name, module_param_name), lr_mul))
|
| 596 |
+
|
| 597 |
+
if (
|
| 598 |
+
"relative_position_bias_table" in module_param_name
|
| 599 |
+
or "absolute_pos_embed" in module_param_name
|
| 600 |
+
):
|
| 601 |
+
hyperparams["weight_decay"] = 0.0
|
| 602 |
+
if isinstance(module, norm_module_types):
|
| 603 |
+
hyperparams["weight_decay"] = weight_decay_norm
|
| 604 |
+
if isinstance(module, torch.nn.Embedding):
|
| 605 |
+
hyperparams["weight_decay"] = weight_decay_embed
|
| 606 |
+
if "bias" in module_name:
|
| 607 |
+
hyperparams["weight_decay"] = weight_decay_bias
|
| 608 |
+
params.append({"params": [value], **hyperparams})
|
| 609 |
+
|
| 610 |
+
def maybe_add_full_model_gradient_clipping(optim):
|
| 611 |
+
# detectron2 doesn't have full model gradient clipping now
|
| 612 |
+
clip_norm_val = cfg_solver['CLIP_GRADIENTS']['CLIP_VALUE']
|
| 613 |
+
enable = (
|
| 614 |
+
cfg_solver['CLIP_GRADIENTS']['ENABLED']
|
| 615 |
+
and cfg_solver['CLIP_GRADIENTS']['CLIP_TYPE'] == "full_model"
|
| 616 |
+
and clip_norm_val > 0.0
|
| 617 |
+
)
|
| 618 |
+
|
| 619 |
+
class FullModelGradientClippingOptimizer(optim):
|
| 620 |
+
def step(self, closure=None):
|
| 621 |
+
all_params = itertools.chain(*[x["params"] for x in self.param_groups])
|
| 622 |
+
torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val)
|
| 623 |
+
super().step(closure=closure)
|
| 624 |
+
|
| 625 |
+
return FullModelGradientClippingOptimizer if enable else optim
|
| 626 |
+
|
| 627 |
+
optimizer_type = cfg_solver['OPTIMIZER']
|
| 628 |
+
if optimizer_type == "SGD":
|
| 629 |
+
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)(
|
| 630 |
+
params, cfg_solver['BASE_LR'], momentum=cfg_solver['MOMENTUM']
|
| 631 |
+
)
|
| 632 |
+
elif optimizer_type == "ADAMW":
|
| 633 |
+
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)(
|
| 634 |
+
params, cfg_solver['BASE_LR']
|
| 635 |
+
)
|
| 636 |
+
else:
|
| 637 |
+
raise NotImplementedError(f"no optimizer type {optimizer_type}")
|
| 638 |
+
return optimizer
|
OpenSeeD/datasets/dataset_mappers/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
from .coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper
|
| 3 |
+
from .coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper
|
| 4 |
+
from .mask_former_instance_dataset_mapper import MaskFormerInstanceDatasetMapper
|
| 5 |
+
from .mask_former_panoptic_dataset_mapper import MaskFormerPanopticDatasetMapper
|
| 6 |
+
from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
|
| 7 |
+
from .imagenet_dataset_mapper import ImageNetDatasetMapper
|
| 8 |
+
from .vlp_dataset_mapper import VLPreDatasetMapper
|
| 9 |
+
from .sunrgbd_dataset_mapper import SunRGBDSegDatasetMapper
|
| 10 |
+
from .scannet_dataset_mapper import ScanNetSegDatasetMapper
|
| 11 |
+
from .bdd_semseg_dataset_mapper import BDDSemDatasetMapper
|
| 12 |
+
from .scannet_pano_dataset_mapper import ScanNetPanoDatasetMapper
|
| 13 |
+
from .refcoco_dataset_mapper import RefCOCODatasetMapper
|
| 14 |
+
from .o365_instance_new_baseline_dataset_mapper import O365InstanceNewBaselineDatasetMapper
|
OpenSeeD/datasets/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
|
| 3 |
+
import copy
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
from detectron2.data import detection_utils as utils
|
| 10 |
+
from detectron2.data import transforms as T
|
| 11 |
+
from detectron2.data.transforms import TransformGen
|
| 12 |
+
from detectron2.structures import BitMasks, Instances
|
| 13 |
+
|
| 14 |
+
from pycocotools import mask as coco_mask
|
| 15 |
+
|
| 16 |
+
from openseed.utils import configurable
|
| 17 |
+
|
| 18 |
+
__all__ = ["COCOInstanceNewBaselineDatasetMapper"]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def convert_coco_poly_to_mask(segmentations, height, width):
|
| 22 |
+
masks = []
|
| 23 |
+
for polygons in segmentations:
|
| 24 |
+
rles = coco_mask.frPyObjects(polygons, height, width)
|
| 25 |
+
mask = coco_mask.decode(rles)
|
| 26 |
+
if len(mask.shape) < 3:
|
| 27 |
+
mask = mask[..., None]
|
| 28 |
+
mask = torch.as_tensor(mask, dtype=torch.uint8)
|
| 29 |
+
mask = mask.any(dim=2)
|
| 30 |
+
masks.append(mask)
|
| 31 |
+
if masks:
|
| 32 |
+
masks = torch.stack(masks, dim=0)
|
| 33 |
+
else:
|
| 34 |
+
masks = torch.zeros((0, height, width), dtype=torch.uint8)
|
| 35 |
+
return masks
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def build_transform_gen(cfg, is_train):
|
| 39 |
+
"""
|
| 40 |
+
Create a list of default :class:`Augmentation` from config.
|
| 41 |
+
Now it includes resizing and flipping.
|
| 42 |
+
Returns:
|
| 43 |
+
list[Augmentation]
|
| 44 |
+
"""
|
| 45 |
+
assert is_train, "Only support training augmentation"
|
| 46 |
+
cfg_input = cfg['INPUT']
|
| 47 |
+
image_size = cfg_input['IMAGE_SIZE']
|
| 48 |
+
min_scale = cfg_input['MIN_SCALE']
|
| 49 |
+
max_scale = cfg_input['MAX_SCALE']
|
| 50 |
+
|
| 51 |
+
augmentation = []
|
| 52 |
+
|
| 53 |
+
if cfg_input['RANDOM_FLIP'] != "none":
|
| 54 |
+
augmentation.append(
|
| 55 |
+
T.RandomFlip(
|
| 56 |
+
horizontal=cfg_input['RANDOM_FLIP'] == "horizontal",
|
| 57 |
+
vertical=cfg_input['RANDOM_FLIP'] == "vertical",
|
| 58 |
+
)
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
augmentation.extend([
|
| 62 |
+
T.ResizeScale(
|
| 63 |
+
min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
|
| 64 |
+
),
|
| 65 |
+
T.FixedSizeCrop(crop_size=(image_size, image_size)),
|
| 66 |
+
])
|
| 67 |
+
|
| 68 |
+
return augmentation
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# This is specifically designed for the COCO dataset.
|
| 72 |
+
class COCOInstanceNewBaselineDatasetMapper:
|
| 73 |
+
"""
|
| 74 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
| 75 |
+
and map it into a format used by MaskFormer.
|
| 76 |
+
|
| 77 |
+
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
|
| 78 |
+
|
| 79 |
+
The callable currently does the following:
|
| 80 |
+
|
| 81 |
+
1. Read the image from "file_name"
|
| 82 |
+
2. Applies geometric transforms to the image and annotation
|
| 83 |
+
3. Find and applies suitable cropping to the image and annotation
|
| 84 |
+
4. Prepare image and annotation to Tensors
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
@configurable
|
| 88 |
+
def __init__(
|
| 89 |
+
self,
|
| 90 |
+
is_train=True,
|
| 91 |
+
*,
|
| 92 |
+
tfm_gens,
|
| 93 |
+
image_format,
|
| 94 |
+
):
|
| 95 |
+
"""
|
| 96 |
+
NOTE: this interface is experimental.
|
| 97 |
+
Args:
|
| 98 |
+
is_train: for training or inference
|
| 99 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
| 100 |
+
tfm_gens: data augmentation
|
| 101 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
| 102 |
+
"""
|
| 103 |
+
self.tfm_gens = tfm_gens
|
| 104 |
+
logging.getLogger(__name__).info(
|
| 105 |
+
"[COCOInstanceNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(str(self.tfm_gens))
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
self.img_format = image_format
|
| 109 |
+
self.is_train = is_train
|
| 110 |
+
|
| 111 |
+
@classmethod
|
| 112 |
+
def from_config(cls, cfg, is_train=True):
|
| 113 |
+
# Build augmentation
|
| 114 |
+
tfm_gens = build_transform_gen(cfg, is_train)
|
| 115 |
+
|
| 116 |
+
ret = {
|
| 117 |
+
"is_train": is_train,
|
| 118 |
+
"tfm_gens": tfm_gens,
|
| 119 |
+
"image_format": cfg['INPUT']['FORMAT'],
|
| 120 |
+
}
|
| 121 |
+
return ret
|
| 122 |
+
|
| 123 |
+
def __call__(self, dataset_dict):
|
| 124 |
+
"""
|
| 125 |
+
Args:
|
| 126 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
| 127 |
+
|
| 128 |
+
Returns:
|
| 129 |
+
dict: a format that builtin models in detectron2 accept
|
| 130 |
+
"""
|
| 131 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
| 132 |
+
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
| 133 |
+
utils.check_image_size(dataset_dict, image)
|
| 134 |
+
|
| 135 |
+
# TODO: get padding mask
|
| 136 |
+
# by feeding a "segmentation mask" to the same transforms
|
| 137 |
+
padding_mask = np.ones(image.shape[:2])
|
| 138 |
+
|
| 139 |
+
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
|
| 140 |
+
# the crop transformation has default padding value 0 for segmentation
|
| 141 |
+
padding_mask = transforms.apply_segmentation(padding_mask)
|
| 142 |
+
padding_mask = ~ padding_mask.astype(bool)
|
| 143 |
+
|
| 144 |
+
image_shape = image.shape[:2] # h, w
|
| 145 |
+
|
| 146 |
+
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
|
| 147 |
+
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
|
| 148 |
+
# Therefore it's important to use torch.Tensor.
|
| 149 |
+
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
|
| 150 |
+
dataset_dict["padding_mask"] = torch.as_tensor(np.ascontiguousarray(padding_mask))
|
| 151 |
+
|
| 152 |
+
if not self.is_train:
|
| 153 |
+
# USER: Modify this if you want to keep them for some reason.
|
| 154 |
+
dataset_dict.pop("annotations", None)
|
| 155 |
+
return dataset_dict
|
| 156 |
+
|
| 157 |
+
if "annotations" in dataset_dict:
|
| 158 |
+
# USER: Modify this if you want to keep them for some reason.
|
| 159 |
+
for anno in dataset_dict["annotations"]:
|
| 160 |
+
# Let's always keep mask
|
| 161 |
+
# if not self.mask_on:
|
| 162 |
+
# anno.pop("segmentation", None)
|
| 163 |
+
anno.pop("keypoints", None)
|
| 164 |
+
|
| 165 |
+
# USER: Implement additional transformations if you have other types of data
|
| 166 |
+
annos = [
|
| 167 |
+
utils.transform_instance_annotations(obj, transforms, image_shape)
|
| 168 |
+
for obj in dataset_dict.pop("annotations")
|
| 169 |
+
if obj.get("iscrowd", 0) == 0
|
| 170 |
+
]
|
| 171 |
+
# NOTE: does not support BitMask due to augmentation
|
| 172 |
+
# Current BitMask cannot handle empty objects
|
| 173 |
+
instances = utils.annotations_to_instances(annos, image_shape)
|
| 174 |
+
# After transforms such as cropping are applied, the bounding box may no longer
|
| 175 |
+
# tightly bound the object. As an example, imagine a triangle object
|
| 176 |
+
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
|
| 177 |
+
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
|
| 178 |
+
# the intersection of original bounding box and the cropping box.
|
| 179 |
+
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
|
| 180 |
+
# Need to filter empty instances first (due to augmentation)
|
| 181 |
+
instances = utils.filter_empty_instances(instances)
|
| 182 |
+
# Generate masks from polygon
|
| 183 |
+
h, w = instances.image_size
|
| 184 |
+
# image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float)
|
| 185 |
+
if hasattr(instances, 'gt_masks'):
|
| 186 |
+
gt_masks = instances.gt_masks
|
| 187 |
+
gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w)
|
| 188 |
+
instances.gt_masks = gt_masks
|
| 189 |
+
dataset_dict["instances"] = instances
|
| 190 |
+
|
| 191 |
+
return dataset_dict
|
OpenSeeD/datasets/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
|
| 3 |
+
|
| 4 |
+
import copy
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
|
| 10 |
+
from detectron2.config import configurable
|
| 11 |
+
from detectron2.data import detection_utils as utils
|
| 12 |
+
from detectron2.data import transforms as T
|
| 13 |
+
from detectron2.data.transforms import TransformGen
|
| 14 |
+
from detectron2.structures import BitMasks, Boxes, Instances
|
| 15 |
+
|
| 16 |
+
__all__ = ["COCOPanopticNewBaselineDatasetMapper"]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def build_transform_gen(cfg, is_train):
|
| 20 |
+
"""
|
| 21 |
+
Create a list of default :class:`Augmentation` from config.
|
| 22 |
+
Now it includes resizing and flipping.
|
| 23 |
+
Returns:
|
| 24 |
+
list[Augmentation]
|
| 25 |
+
"""
|
| 26 |
+
assert is_train, "Only support training augmentation"
|
| 27 |
+
image_size = cfg.INPUT.IMAGE_SIZE
|
| 28 |
+
min_scale = cfg.INPUT.MIN_SCALE
|
| 29 |
+
max_scale = cfg.INPUT.MAX_SCALE
|
| 30 |
+
|
| 31 |
+
augmentation = []
|
| 32 |
+
|
| 33 |
+
if cfg.INPUT.RANDOM_FLIP != "none":
|
| 34 |
+
augmentation.append(
|
| 35 |
+
T.RandomFlip(
|
| 36 |
+
horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
|
| 37 |
+
vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
|
| 38 |
+
)
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
augmentation.extend([
|
| 42 |
+
T.ResizeScale(
|
| 43 |
+
min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
|
| 44 |
+
),
|
| 45 |
+
T.FixedSizeCrop(crop_size=(image_size, image_size)),
|
| 46 |
+
])
|
| 47 |
+
|
| 48 |
+
return augmentation
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# This is specifically designed for the COCO dataset.
|
| 52 |
+
class COCOPanopticNewBaselineDatasetMapper:
|
| 53 |
+
"""
|
| 54 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
| 55 |
+
and map it into a format used by MaskFormer.
|
| 56 |
+
|
| 57 |
+
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
|
| 58 |
+
|
| 59 |
+
The callable currently does the following:
|
| 60 |
+
|
| 61 |
+
1. Read the image from "file_name"
|
| 62 |
+
2. Applies geometric transforms to the image and annotation
|
| 63 |
+
3. Find and applies suitable cropping to the image and annotation
|
| 64 |
+
4. Prepare image and annotation to Tensors
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
@configurable
|
| 68 |
+
def __init__(
|
| 69 |
+
self,
|
| 70 |
+
is_train=True,
|
| 71 |
+
*,
|
| 72 |
+
tfm_gens,
|
| 73 |
+
image_format,
|
| 74 |
+
):
|
| 75 |
+
"""
|
| 76 |
+
NOTE: this interface is experimental.
|
| 77 |
+
Args:
|
| 78 |
+
is_train: for training or inference
|
| 79 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
| 80 |
+
crop_gen: crop augmentation
|
| 81 |
+
tfm_gens: data augmentation
|
| 82 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
| 83 |
+
"""
|
| 84 |
+
self.tfm_gens = tfm_gens
|
| 85 |
+
logging.getLogger(__name__).info(
|
| 86 |
+
"[COCOPanopticNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(
|
| 87 |
+
str(self.tfm_gens)
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
self.img_format = image_format
|
| 92 |
+
self.is_train = is_train
|
| 93 |
+
|
| 94 |
+
@classmethod
|
| 95 |
+
def from_config(cls, cfg, is_train=True):
|
| 96 |
+
# Build augmentation
|
| 97 |
+
tfm_gens = build_transform_gen(cfg, is_train)
|
| 98 |
+
|
| 99 |
+
ret = {
|
| 100 |
+
"is_train": is_train,
|
| 101 |
+
"tfm_gens": tfm_gens,
|
| 102 |
+
"image_format": cfg.INPUT.FORMAT,
|
| 103 |
+
}
|
| 104 |
+
return ret
|
| 105 |
+
|
| 106 |
+
def __call__(self, dataset_dict):
|
| 107 |
+
"""
|
| 108 |
+
Args:
|
| 109 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
dict: a format that builtin models in detectron2 accept
|
| 113 |
+
"""
|
| 114 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
| 115 |
+
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
| 116 |
+
utils.check_image_size(dataset_dict, image)
|
| 117 |
+
|
| 118 |
+
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
|
| 119 |
+
image_shape = image.shape[:2] # h, w
|
| 120 |
+
|
| 121 |
+
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
|
| 122 |
+
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
|
| 123 |
+
# Therefore it's important to use torch.Tensor.
|
| 124 |
+
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
|
| 125 |
+
|
| 126 |
+
if not self.is_train:
|
| 127 |
+
# USER: Modify this if you want to keep them for some reason.
|
| 128 |
+
dataset_dict.pop("annotations", None)
|
| 129 |
+
return dataset_dict
|
| 130 |
+
|
| 131 |
+
if "pan_seg_file_name" in dataset_dict:
|
| 132 |
+
pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
|
| 133 |
+
segments_info = dataset_dict["segments_info"]
|
| 134 |
+
|
| 135 |
+
# apply the same transformation to panoptic segmentation
|
| 136 |
+
pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
|
| 137 |
+
|
| 138 |
+
from panopticapi.utils import rgb2id
|
| 139 |
+
|
| 140 |
+
pan_seg_gt = rgb2id(pan_seg_gt)
|
| 141 |
+
|
| 142 |
+
instances = Instances(image_shape)
|
| 143 |
+
classes = []
|
| 144 |
+
masks = []
|
| 145 |
+
for segment_info in segments_info:
|
| 146 |
+
class_id = segment_info["category_id"]
|
| 147 |
+
if not segment_info["iscrowd"]:
|
| 148 |
+
classes.append(class_id)
|
| 149 |
+
masks.append(pan_seg_gt == segment_info["id"])
|
| 150 |
+
|
| 151 |
+
classes = np.array(classes)
|
| 152 |
+
instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
|
| 153 |
+
if len(masks) == 0:
|
| 154 |
+
# Some image does not have annotation (all ignored)
|
| 155 |
+
instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
|
| 156 |
+
instances.gt_boxes = Boxes(torch.zeros((0, 4)))
|
| 157 |
+
else:
|
| 158 |
+
masks = BitMasks(
|
| 159 |
+
torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
|
| 160 |
+
)
|
| 161 |
+
instances.gt_masks = masks.tensor
|
| 162 |
+
instances.gt_boxes = masks.get_bounding_boxes()
|
| 163 |
+
|
| 164 |
+
dataset_dict["instances"] = instances
|
| 165 |
+
|
| 166 |
+
return dataset_dict
|
OpenSeeD/datasets/dataset_mappers/imagenet_dataset_mapper.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --------------------------------------------------------
|
| 2 |
+
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
|
| 3 |
+
# Copyright (c) 2022 Microsoft
|
| 4 |
+
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
+
# Modified by Xueyan Zou (xueyan@cs.wisc.edu)
|
| 6 |
+
# --------------------------------------------------------
|
| 7 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 8 |
+
import copy
|
| 9 |
+
from PIL import Image
|
| 10 |
+
# import logging
|
| 11 |
+
|
| 12 |
+
import cv2
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
from torchvision import transforms
|
| 17 |
+
|
| 18 |
+
from openseed.utils import configurable
|
| 19 |
+
|
| 20 |
+
__all__ = ["ImageNetDatasetMapper"]
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# This is specifically designed for the COCO dataset.
|
| 24 |
+
class ImageNetDatasetMapper:
|
| 25 |
+
"""
|
| 26 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
| 27 |
+
and map it into a format used by MaskFormer.
|
| 28 |
+
|
| 29 |
+
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
|
| 30 |
+
|
| 31 |
+
The callable currently does the following:
|
| 32 |
+
|
| 33 |
+
1. Read the image from "file_name"
|
| 34 |
+
2. Applies geometric transforms to the image and annotation
|
| 35 |
+
3. Find and applies suitable cropping to the image and annotation
|
| 36 |
+
4. Prepare image and annotation to Tensors
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
@configurable
|
| 40 |
+
def __init__(
|
| 41 |
+
self,
|
| 42 |
+
is_train=True,
|
| 43 |
+
size_train=None,
|
| 44 |
+
size_test=None,
|
| 45 |
+
size_crop=None,
|
| 46 |
+
):
|
| 47 |
+
"""
|
| 48 |
+
NOTE: this interface is experimental.
|
| 49 |
+
Args:
|
| 50 |
+
is_train: for training or inference
|
| 51 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
| 52 |
+
tfm_gens: data augmentation
|
| 53 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
| 54 |
+
"""
|
| 55 |
+
self.is_train = is_train
|
| 56 |
+
self.size_train = size_train
|
| 57 |
+
self.size_test = size_test
|
| 58 |
+
self.size_crop = size_crop
|
| 59 |
+
|
| 60 |
+
t = []
|
| 61 |
+
t.append(transforms.Resize(size_crop, interpolation=Image.BICUBIC))
|
| 62 |
+
t.append(transforms.CenterCrop(size_test))
|
| 63 |
+
self.transform = transforms.Compose(t)
|
| 64 |
+
|
| 65 |
+
@classmethod
|
| 66 |
+
def from_config(cls, cfg, is_train=True):
|
| 67 |
+
ret = {
|
| 68 |
+
"is_train": is_train,
|
| 69 |
+
"size_train": cfg['INPUT']['SIZE_TRAIN'],
|
| 70 |
+
"size_test": cfg['INPUT']['SIZE_TEST'],
|
| 71 |
+
"size_crop": cfg['INPUT']['SIZE_CROP']
|
| 72 |
+
}
|
| 73 |
+
return ret
|
| 74 |
+
|
| 75 |
+
def __call__(self, dataset_dict):
|
| 76 |
+
"""
|
| 77 |
+
Args:
|
| 78 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
| 79 |
+
|
| 80 |
+
Returns:
|
| 81 |
+
dict: a format that builtin models in detectron2 accept
|
| 82 |
+
"""
|
| 83 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
| 84 |
+
file_name = dataset_dict['file_name']
|
| 85 |
+
image = Image.open(file_name).convert('RGB')
|
| 86 |
+
|
| 87 |
+
if self.is_train == False:
|
| 88 |
+
image = self.transform(image)
|
| 89 |
+
image = torch.from_numpy(np.asarray(image).copy())
|
| 90 |
+
image = image.permute(2,0,1)
|
| 91 |
+
|
| 92 |
+
dataset_dict['image'] = image
|
| 93 |
+
dataset_dict['height'] = image.shape[1]
|
| 94 |
+
dataset_dict['width'] = image.shape[2]
|
| 95 |
+
return dataset_dict
|
OpenSeeD/datasets/dataset_mappers/lvis_dataset_mapper.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
# Modified by Bowen Cheng from https://github.com/facebookresearch/detr/blob/master/d2/detr/dataset_mapper.py
|
| 3 |
+
import copy
|
| 4 |
+
import random
|
| 5 |
+
|
| 6 |
+
import scipy.io
|
| 7 |
+
import numpy as np
|
| 8 |
+
import torch
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
from torchvision import transforms
|
| 12 |
+
|
| 13 |
+
from pycocotools import mask
|
| 14 |
+
from detectron2.data import detection_utils as utils
|
| 15 |
+
from detectron2.data import transforms as T
|
| 16 |
+
from detectron2.data import MetadataCatalog
|
| 17 |
+
|
| 18 |
+
from ...Networks.Mask2Former.utils import configurable
|
| 19 |
+
|
| 20 |
+
__all__ = ["LVISDatasetMapper"]
|
| 21 |
+
|
| 22 |
+
def build_transform_gen(cfg, is_train):
|
| 23 |
+
"""
|
| 24 |
+
Create a list of default :class:`Augmentation` from config.
|
| 25 |
+
Now it includes resizing and flipping.
|
| 26 |
+
Returns:
|
| 27 |
+
list[Augmentation]
|
| 28 |
+
"""
|
| 29 |
+
assert is_train, "Only support training augmentation"
|
| 30 |
+
cfg_input = cfg['INPUT']
|
| 31 |
+
image_size = cfg_input['IMAGE_SIZE']
|
| 32 |
+
min_scale = cfg_input['MIN_SCALE']
|
| 33 |
+
max_scale = cfg_input['MAX_SCALE']
|
| 34 |
+
|
| 35 |
+
augmentation = []
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
if cfg_input['RANDOM_FLIP'] != "none":
|
| 39 |
+
augmentation.append(
|
| 40 |
+
T.RandomFlip(
|
| 41 |
+
horizontal=cfg_input['RANDOM_FLIP'] == "horizontal",
|
| 42 |
+
vertical=cfg_input['RANDOM_FLIP'] == "vertical",
|
| 43 |
+
)
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
augmentation.extend([
|
| 47 |
+
T.ResizeScale(
|
| 48 |
+
min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
|
| 49 |
+
),
|
| 50 |
+
T.FixedSizeCrop(crop_size=(image_size, image_size)),
|
| 51 |
+
])
|
| 52 |
+
|
| 53 |
+
return augmentation
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# This is specifically designed for the COCO dataset.
|
| 57 |
+
class LVISDatasetMapper:
|
| 58 |
+
"""
|
| 59 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
| 60 |
+
and map it into a format used by MaskFormer.
|
| 61 |
+
|
| 62 |
+
This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
|
| 63 |
+
|
| 64 |
+
The callable currently does the following:
|
| 65 |
+
|
| 66 |
+
1. Read the image from "file_name"
|
| 67 |
+
2. Applies geometric transforms to the image and annotation
|
| 68 |
+
3. Find and applies suitable cropping to the image and annotation
|
| 69 |
+
4. Prepare image and annotation to Tensors
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
@configurable
|
| 73 |
+
def __init__(
|
| 74 |
+
self,
|
| 75 |
+
is_train=True,
|
| 76 |
+
tfm_gens=None,
|
| 77 |
+
image_format=None,
|
| 78 |
+
min_size_test=None,
|
| 79 |
+
max_size_test=None,
|
| 80 |
+
mean=None,
|
| 81 |
+
std=None,
|
| 82 |
+
max_len=None,
|
| 83 |
+
):
|
| 84 |
+
"""
|
| 85 |
+
NOTE: this interface is experimental.
|
| 86 |
+
Args:
|
| 87 |
+
is_train: for training or inference
|
| 88 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
| 89 |
+
tfm_gens: data augmentation
|
| 90 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
| 91 |
+
"""
|
| 92 |
+
self.tfm_gens = tfm_gens
|
| 93 |
+
self.img_format = image_format
|
| 94 |
+
self.is_train = is_train
|
| 95 |
+
self.min_size_test = min_size_test
|
| 96 |
+
self.max_size_test = max_size_test
|
| 97 |
+
self.pixel_mean = torch.tensor(mean)[:,None,None]
|
| 98 |
+
self.pixel_std = torch.tensor(std)[:,None,None]
|
| 99 |
+
self.max_grounding_num = max_len
|
| 100 |
+
|
| 101 |
+
t = []
|
| 102 |
+
t.append(transforms.Resize(self.min_size_test, interpolation=Image.BICUBIC))
|
| 103 |
+
self.transform = transforms.Compose(t)
|
| 104 |
+
self.categories = torch.load(MetadataCatalog.get('logistic').get('cat_root'))
|
| 105 |
+
|
| 106 |
+
@classmethod
|
| 107 |
+
def from_config(cls, cfg, is_train=True):
|
| 108 |
+
# Build augmentation
|
| 109 |
+
if is_train:
|
| 110 |
+
tfm_gens = build_transform_gen(cfg, is_train)
|
| 111 |
+
else:
|
| 112 |
+
tfm_gens = None
|
| 113 |
+
|
| 114 |
+
ret = {
|
| 115 |
+
"is_train": is_train,
|
| 116 |
+
"tfm_gens": tfm_gens,
|
| 117 |
+
"image_format": cfg['INPUT']['FORMAT'],
|
| 118 |
+
"min_size_test": cfg['INPUT']['MIN_SIZE_TEST'],
|
| 119 |
+
"max_size_test": cfg['INPUT']['MAX_SIZE_TEST'],
|
| 120 |
+
"mean": cfg['INPUT']['PIXEL_MEAN'],
|
| 121 |
+
"std": cfg['INPUT']['PIXEL_STD'],
|
| 122 |
+
"max_len": cfg['MODEL']['DECODER']['GROUNDING']['MAX_LEN'],
|
| 123 |
+
}
|
| 124 |
+
return ret
|
| 125 |
+
|
| 126 |
+
def __call__(self, dataset_dict):
|
| 127 |
+
"""
|
| 128 |
+
Args:
|
| 129 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
dict: a format that builtin models in detectron2 accept
|
| 133 |
+
"""
|
| 134 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
| 135 |
+
file_name = dataset_dict['file_name']
|
| 136 |
+
if self.is_train == False:
|
| 137 |
+
assert False, "Only support training."
|
| 138 |
+
else:
|
| 139 |
+
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
| 140 |
+
utils.check_image_size(dataset_dict, image)
|
| 141 |
+
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
|
| 142 |
+
image_shape = image.shape[:2] # h, w
|
| 143 |
+
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
|
| 144 |
+
|
| 145 |
+
assert len(dataset_dict['instance']) > 0
|
| 146 |
+
masks_grd = []
|
| 147 |
+
texts_grd = []
|
| 148 |
+
boxes_grd = []
|
| 149 |
+
hash_grd = []
|
| 150 |
+
for inst, label in zip(dataset_dict['instance'], dataset_dict['labels']):
|
| 151 |
+
rle = mask.frPyObjects(inst, dataset_dict['height'], dataset_dict['width'])
|
| 152 |
+
m = mask.decode(rle)
|
| 153 |
+
# sometimes there are multiple binary map (corresponding to multiple segs)
|
| 154 |
+
m = np.sum(m, axis=2)
|
| 155 |
+
m = m.astype(np.uint8) # convert to np.uint8
|
| 156 |
+
m = transforms.apply_segmentation(m[:,:,None])[:,:,0]
|
| 157 |
+
masks_grd += [m]
|
| 158 |
+
label_names = self.categories[label]
|
| 159 |
+
rand_id = random.randint(0, len(label_names)-1)
|
| 160 |
+
texts_grd.append(label_names[rand_id].lower())
|
| 161 |
+
hash_grd.append(hash(label_names[rand_id].lower()))
|
| 162 |
+
|
| 163 |
+
indices = torch.randperm(len(hash_grd))[:self.max_grounding_num]
|
| 164 |
+
masks_grd = torch.from_numpy(np.stack(masks_grd))[indices]
|
| 165 |
+
boxes_grd = torch.tensor(boxes_grd)
|
| 166 |
+
texts_grd = np.array(texts_grd)[indices.numpy()].tolist()
|
| 167 |
+
hash_grd = np.array(hash_grd)[indices.numpy()].tolist()
|
| 168 |
+
groundings = {'masks': masks_grd, 'texts': texts_grd, 'hash': hash_grd, 'mode': 'text'}
|
| 169 |
+
dataset_dict["groundings"] = groundings
|
| 170 |
+
return dataset_dict
|
OpenSeeD/datasets/dataset_mappers/mask_former_instance_dataset_mapper.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import copy
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pycocotools.mask as mask_util
|
| 7 |
+
import torch
|
| 8 |
+
from torch.nn import functional as F
|
| 9 |
+
|
| 10 |
+
from detectron2.data import detection_utils as utils
|
| 11 |
+
from detectron2.data import transforms as T
|
| 12 |
+
from detectron2.projects.point_rend import ColorAugSSDTransform
|
| 13 |
+
from detectron2.structures import BitMasks, Instances, polygons_to_bitmask
|
| 14 |
+
|
| 15 |
+
from openseed.utils import configurable
|
| 16 |
+
|
| 17 |
+
__all__ = ["MaskFormerInstanceDatasetMapper"]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class MaskFormerInstanceDatasetMapper:
|
| 21 |
+
"""
|
| 22 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
| 23 |
+
and map it into a format used by MaskFormer for instance segmentation.
|
| 24 |
+
|
| 25 |
+
The callable currently does the following:
|
| 26 |
+
|
| 27 |
+
1. Read the image from "file_name"
|
| 28 |
+
2. Applies geometric transforms to the image and annotation
|
| 29 |
+
3. Find and applies suitable cropping to the image and annotation
|
| 30 |
+
4. Prepare image and annotation to Tensors
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
@configurable
|
| 34 |
+
def __init__(
|
| 35 |
+
self,
|
| 36 |
+
is_train=True,
|
| 37 |
+
*,
|
| 38 |
+
augmentations,
|
| 39 |
+
image_format,
|
| 40 |
+
size_divisibility,
|
| 41 |
+
):
|
| 42 |
+
"""
|
| 43 |
+
NOTE: this interface is experimental.
|
| 44 |
+
Args:
|
| 45 |
+
is_train: for training or inference
|
| 46 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
| 47 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
| 48 |
+
size_divisibility: pad image size to be divisible by this value
|
| 49 |
+
"""
|
| 50 |
+
self.is_train = is_train
|
| 51 |
+
self.tfm_gens = augmentations
|
| 52 |
+
self.img_format = image_format
|
| 53 |
+
self.size_divisibility = size_divisibility
|
| 54 |
+
|
| 55 |
+
logger = logging.getLogger(__name__)
|
| 56 |
+
mode = "training" if is_train else "inference"
|
| 57 |
+
logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}")
|
| 58 |
+
|
| 59 |
+
@classmethod
|
| 60 |
+
def from_config(cls, cfg, is_train=True):
|
| 61 |
+
# Build augmentation
|
| 62 |
+
cfg_input = cfg['INPUT']
|
| 63 |
+
augs = [
|
| 64 |
+
T.ResizeShortestEdge(
|
| 65 |
+
cfg_input['MIN_SIZE_TRAIN'],
|
| 66 |
+
cfg_input['MAX_SIZE_TRAIN'],
|
| 67 |
+
cfg_input['MIN_SIZE_TRAIN_SAMPLING'],
|
| 68 |
+
)
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
cfg_input_crop = cfg_input['CROP']
|
| 72 |
+
if cfg_input_crop['ENABLED']:
|
| 73 |
+
augs.append(
|
| 74 |
+
T.RandomCrop(
|
| 75 |
+
cfg_input_crop['TYPE'],
|
| 76 |
+
cfg_input_crop['SIZE'],
|
| 77 |
+
)
|
| 78 |
+
)
|
| 79 |
+
if cfg_input['COLOR_AUG_SSD']:
|
| 80 |
+
augs.append(ColorAugSSDTransform(img_format=cfg_input['FORMAT']))
|
| 81 |
+
augs.append(T.RandomFlip())
|
| 82 |
+
|
| 83 |
+
ret = {
|
| 84 |
+
"is_train": is_train,
|
| 85 |
+
"augmentations": augs,
|
| 86 |
+
"image_format": cfg_input['FORMAT'],
|
| 87 |
+
"size_divisibility": cfg_input['SIZE_DIVISIBILITY'],
|
| 88 |
+
}
|
| 89 |
+
return ret
|
| 90 |
+
|
| 91 |
+
def __call__(self, dataset_dict):
|
| 92 |
+
"""
|
| 93 |
+
Args:
|
| 94 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
dict: a format that builtin models in detectron2 accept
|
| 98 |
+
"""
|
| 99 |
+
assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!"
|
| 100 |
+
|
| 101 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
| 102 |
+
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
| 103 |
+
utils.check_image_size(dataset_dict, image)
|
| 104 |
+
|
| 105 |
+
aug_input = T.AugInput(image)
|
| 106 |
+
aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
|
| 107 |
+
image = aug_input.image
|
| 108 |
+
|
| 109 |
+
# transform instnace masks
|
| 110 |
+
assert "annotations" in dataset_dict
|
| 111 |
+
for anno in dataset_dict["annotations"]:
|
| 112 |
+
anno.pop("keypoints", None)
|
| 113 |
+
|
| 114 |
+
annos = [
|
| 115 |
+
utils.transform_instance_annotations(obj, transforms, image.shape[:2])
|
| 116 |
+
for obj in dataset_dict.pop("annotations")
|
| 117 |
+
if obj.get("iscrowd", 0) == 0
|
| 118 |
+
]
|
| 119 |
+
|
| 120 |
+
if len(annos):
|
| 121 |
+
assert "segmentation" in annos[0]
|
| 122 |
+
segms = [obj["segmentation"] for obj in annos]
|
| 123 |
+
masks = []
|
| 124 |
+
for segm in segms:
|
| 125 |
+
if isinstance(segm, list):
|
| 126 |
+
# polygon
|
| 127 |
+
masks.append(polygons_to_bitmask(segm, *image.shape[:2]))
|
| 128 |
+
elif isinstance(segm, dict):
|
| 129 |
+
# COCO RLE
|
| 130 |
+
masks.append(mask_util.decode(segm))
|
| 131 |
+
elif isinstance(segm, np.ndarray):
|
| 132 |
+
assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
|
| 133 |
+
segm.ndim
|
| 134 |
+
)
|
| 135 |
+
# mask array
|
| 136 |
+
masks.append(segm)
|
| 137 |
+
else:
|
| 138 |
+
raise ValueError(
|
| 139 |
+
"Cannot convert segmentation of type '{}' to BitMasks!"
|
| 140 |
+
"Supported types are: polygons as list[list[float] or ndarray],"
|
| 141 |
+
" COCO-style RLE as a dict, or a binary segmentation mask "
|
| 142 |
+
" in a 2D numpy array of shape HxW.".format(type(segm))
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
# Pad image and segmentation label here!
|
| 146 |
+
image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
|
| 147 |
+
masks = [torch.from_numpy(np.ascontiguousarray(x)) for x in masks]
|
| 148 |
+
|
| 149 |
+
classes = [int(obj["category_id"]) for obj in annos]
|
| 150 |
+
classes = torch.tensor(classes, dtype=torch.int64)
|
| 151 |
+
|
| 152 |
+
if self.size_divisibility > 0:
|
| 153 |
+
image_size = (image.shape[-2], image.shape[-1])
|
| 154 |
+
padding_size = [
|
| 155 |
+
0,
|
| 156 |
+
self.size_divisibility - image_size[1],
|
| 157 |
+
0,
|
| 158 |
+
self.size_divisibility - image_size[0],
|
| 159 |
+
]
|
| 160 |
+
# pad image
|
| 161 |
+
image = F.pad(image, padding_size, value=128).contiguous()
|
| 162 |
+
# pad mask
|
| 163 |
+
masks = [F.pad(x, padding_size, value=0).contiguous() for x in masks]
|
| 164 |
+
|
| 165 |
+
image_shape = (image.shape[-2], image.shape[-1]) # h, w
|
| 166 |
+
|
| 167 |
+
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
|
| 168 |
+
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
|
| 169 |
+
# Therefore it's important to use torch.Tensor.
|
| 170 |
+
dataset_dict["image"] = image
|
| 171 |
+
|
| 172 |
+
# Prepare per-category binary masks
|
| 173 |
+
instances = Instances(image_shape)
|
| 174 |
+
instances.gt_classes = classes
|
| 175 |
+
if len(masks) == 0:
|
| 176 |
+
# Some image does not have annotation (all ignored)
|
| 177 |
+
instances.gt_masks = torch.zeros((0, image.shape[-2], image.shape[-1]))
|
| 178 |
+
else:
|
| 179 |
+
masks = BitMasks(torch.stack(masks))
|
| 180 |
+
instances.gt_masks = masks.tensor
|
| 181 |
+
|
| 182 |
+
dataset_dict["instances"] = instances
|
| 183 |
+
|
| 184 |
+
return dataset_dict
|
OpenSeeD/datasets/dataset_mappers/mask_former_panoptic_dataset_mapper.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
| 2 |
+
import copy
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
from torch.nn import functional as F
|
| 8 |
+
|
| 9 |
+
from detectron2.data import detection_utils as utils
|
| 10 |
+
from detectron2.data import transforms as T
|
| 11 |
+
from detectron2.structures import BitMasks, Instances
|
| 12 |
+
|
| 13 |
+
from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
|
| 14 |
+
from openseed.utils import configurable
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
__all__ = ["MaskFormerPanopticDatasetMapper"]
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class MaskFormerPanopticDatasetMapper(MaskFormerSemanticDatasetMapper):
|
| 22 |
+
"""
|
| 23 |
+
A callable which takes a dataset dict in Detectron2 Dataset format,
|
| 24 |
+
and map it into a format used by MaskFormer for panoptic segmentation.
|
| 25 |
+
|
| 26 |
+
The callable currently does the following:
|
| 27 |
+
|
| 28 |
+
1. Read the image from "file_name"
|
| 29 |
+
2. Applies geometric transforms to the image and annotation
|
| 30 |
+
3. Find and applies suitable cropping to the image and annotation
|
| 31 |
+
4. Prepare image and annotation to Tensors
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
@configurable
|
| 35 |
+
def __init__(
|
| 36 |
+
self,
|
| 37 |
+
is_train=True,
|
| 38 |
+
*,
|
| 39 |
+
augmentations,
|
| 40 |
+
image_format,
|
| 41 |
+
ignore_label,
|
| 42 |
+
size_divisibility,
|
| 43 |
+
):
|
| 44 |
+
"""
|
| 45 |
+
NOTE: this interface is experimental.
|
| 46 |
+
Args:
|
| 47 |
+
is_train: for training or inference
|
| 48 |
+
augmentations: a list of augmentations or deterministic transforms to apply
|
| 49 |
+
image_format: an image format supported by :func:`detection_utils.read_image`.
|
| 50 |
+
ignore_label: the label that is ignored to evaluation
|
| 51 |
+
size_divisibility: pad image size to be divisible by this value
|
| 52 |
+
"""
|
| 53 |
+
super().__init__(
|
| 54 |
+
is_train,
|
| 55 |
+
augmentations=augmentations,
|
| 56 |
+
image_format=image_format,
|
| 57 |
+
ignore_label=ignore_label,
|
| 58 |
+
size_divisibility=size_divisibility,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
def __call__(self, dataset_dict):
|
| 62 |
+
"""
|
| 63 |
+
Args:
|
| 64 |
+
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
dict: a format that builtin models in detectron2 accept
|
| 68 |
+
"""
|
| 69 |
+
assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!"
|
| 70 |
+
|
| 71 |
+
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
|
| 72 |
+
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
|
| 73 |
+
utils.check_image_size(dataset_dict, image)
|
| 74 |
+
|
| 75 |
+
# semantic segmentation
|
| 76 |
+
if "sem_seg_file_name" in dataset_dict:
|
| 77 |
+
# PyTorch transformation not implemented for uint16, so converting it to double first
|
| 78 |
+
sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double")
|
| 79 |
+
else:
|
| 80 |
+
sem_seg_gt = None
|
| 81 |
+
|
| 82 |
+
# panoptic segmentation
|
| 83 |
+
if "pan_seg_file_name" in dataset_dict:
|
| 84 |
+
pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
|
| 85 |
+
segments_info = dataset_dict["segments_info"]
|
| 86 |
+
else:
|
| 87 |
+
pan_seg_gt = None
|
| 88 |
+
segments_info = None
|
| 89 |
+
|
| 90 |
+
if pan_seg_gt is None:
|
| 91 |
+
raise ValueError(
|
| 92 |
+
"Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format(
|
| 93 |
+
dataset_dict["file_name"]
|
| 94 |
+
)
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
|
| 98 |
+
aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
|
| 99 |
+
image = aug_input.image
|
| 100 |
+
if sem_seg_gt is not None:
|
| 101 |
+
sem_seg_gt = aug_input.sem_seg
|
| 102 |
+
|
| 103 |
+
# apply the same transformation to panoptic segmentation
|
| 104 |
+
pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
|
| 105 |
+
|
| 106 |
+
from panopticapi.utils import rgb2id
|
| 107 |
+
|
| 108 |
+
pan_seg_gt = rgb2id(pan_seg_gt)
|
| 109 |
+
|
| 110 |
+
# Pad image and segmentation label here!
|
| 111 |
+
image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
|
| 112 |
+
if sem_seg_gt is not None:
|
| 113 |
+
sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
|
| 114 |
+
pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long"))
|
| 115 |
+
|
| 116 |
+
if self.size_divisibility > 0:
|
| 117 |
+
image_size = (image.shape[-2], image.shape[-1])
|
| 118 |
+
padding_size = [
|
| 119 |
+
0,
|
| 120 |
+
self.size_divisibility - image_size[1],
|
| 121 |
+
0,
|
| 122 |
+
self.size_divisibility - image_size[0],
|
| 123 |
+
]
|
| 124 |
+
image = F.pad(image, padding_size, value=128).contiguous()
|
| 125 |
+
if sem_seg_gt is not None:
|
| 126 |
+
sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
|
| 127 |
+
pan_seg_gt = F.pad(
|
| 128 |
+
pan_seg_gt, padding_size, value=0
|
| 129 |
+
).contiguous() # 0 is the VOID panoptic label
|
| 130 |
+
|
| 131 |
+
image_shape = (image.shape[-2], image.shape[-1]) # h, w
|
| 132 |
+
|
| 133 |
+
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
|
| 134 |
+
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
|
| 135 |
+
# Therefore it's important to use torch.Tensor.
|
| 136 |
+
dataset_dict["image"] = image
|
| 137 |
+
if sem_seg_gt is not None:
|
| 138 |
+
dataset_dict["sem_seg"] = sem_seg_gt.long()
|
| 139 |
+
|
| 140 |
+
if "annotations" in dataset_dict:
|
| 141 |
+
raise ValueError("Pemantic segmentation dataset should not have 'annotations'.")
|
| 142 |
+
|
| 143 |
+
# Prepare per-category binary masks
|
| 144 |
+
pan_seg_gt = pan_seg_gt.numpy()
|
| 145 |
+
instances = Instances(image_shape)
|
| 146 |
+
classes = []
|
| 147 |
+
masks = []
|
| 148 |
+
for segment_info in segments_info:
|
| 149 |
+
class_id = segment_info["category_id"]
|
| 150 |
+
if not segment_info["iscrowd"]:
|
| 151 |
+
classes.append(class_id)
|
| 152 |
+
masks.append(pan_seg_gt == segment_info["id"])
|
| 153 |
+
|
| 154 |
+
classes = np.array(classes)
|
| 155 |
+
instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
|
| 156 |
+
if len(masks) == 0:
|
| 157 |
+
# Some image does not have annotation (all ignored)
|
| 158 |
+
instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
|
| 159 |
+
else:
|
| 160 |
+
masks = BitMasks(
|
| 161 |
+
torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
|
| 162 |
+
)
|
| 163 |
+
instances.gt_masks = masks.tensor
|
| 164 |
+
instances.gt_boxes = masks.get_bounding_boxes()
|
| 165 |
+
|
| 166 |
+
dataset_dict["instances"] = instances
|
| 167 |
+
|
| 168 |
+
return dataset_dict
|