Ge Zheng commited on
Commit ·
7416b67
1
Parent(s): d4c3ee7
refact (exp, docs) training scales controller and add the corresponding tutorial (#562)
Browse files- README.md +2 -1
- docs/manipulate_training_image_size.md +59 -0
- exps/default/nano.py +2 -1
- exps/default/yolov3.py +0 -60
- exps/default/yolox_tiny.py +2 -1
- exps/example/custom/nano.py +2 -1
- exps/example/yolox_voc/yolox_voc_s.py +2 -1
- yolox/core/trainer.py +1 -1
- yolox/data/datasets/mosaicdetection.py +7 -7
- yolox/exp/yolox_base.py +14 -4
README.md
CHANGED
|
@@ -186,7 +186,8 @@ python tools/eval.py -n yolox-s -c yolox_s.pth -b 1 -d 1 --conf 0.001 --fp16 --
|
|
| 186 |
<details>
|
| 187 |
<summary>Tutorials</summary>
|
| 188 |
|
| 189 |
-
* [Training on custom data](docs/train_custom_data.md)
|
|
|
|
| 190 |
|
| 191 |
</details>
|
| 192 |
|
|
|
|
| 186 |
<details>
|
| 187 |
<summary>Tutorials</summary>
|
| 188 |
|
| 189 |
+
* [Training on custom data](docs/train_custom_data.md)
|
| 190 |
+
* [Manipulating training image size](docs/manipulate_training_image_size.md)
|
| 191 |
|
| 192 |
</details>
|
| 193 |
|
docs/manipulate_training_image_size.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Manipulating Your Training Image Size
|
| 2 |
+
|
| 3 |
+
This tutorial explains how to control your image size when training on your own data.
|
| 4 |
+
|
| 5 |
+
## 1. Introduction
|
| 6 |
+
|
| 7 |
+
There are 3 hyperparamters control the training size:
|
| 8 |
+
|
| 9 |
+
- self.input_size = (640, 640)
|
| 10 |
+
- self.multiscale_range = 5
|
| 11 |
+
- self.random_size = (14, 26)
|
| 12 |
+
|
| 13 |
+
There is 1 hyperparameter constrols the testing size:
|
| 14 |
+
|
| 15 |
+
- self.test_size = (640, 640)
|
| 16 |
+
|
| 17 |
+
The self.input_size is suggested to set to the same value as self.test_size. By default, it is set to (640, 640) for most models and (416, 416) for yolox-tiny and yolox-nano.
|
| 18 |
+
|
| 19 |
+
## 2. Multi Scale Training
|
| 20 |
+
|
| 21 |
+
When training on your custom dataset, you can use multiscale training in 2 ways:
|
| 22 |
+
|
| 23 |
+
1. **【Default】Only specifying the self.input_size and leaving others unchanged.**
|
| 24 |
+
|
| 25 |
+
If so, the actual multiscale sizes range from:
|
| 26 |
+
|
| 27 |
+
[self.input_size[0] - self.multiscale_range\*32, self.input_size[0] + self.multiscale_range\*32]
|
| 28 |
+
|
| 29 |
+
For example, if you only set:
|
| 30 |
+
|
| 31 |
+
```python
|
| 32 |
+
self.input_size = (640, 640)
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
the actual multiscale range is [640 - 5*32, 640 + 5\*32], i.e., [480, 800].
|
| 36 |
+
|
| 37 |
+
You can modify self.multiscale_range to change the multiscale range.
|
| 38 |
+
|
| 39 |
+
2. **Simultaneously specifying the self.input_size and self.random_size**
|
| 40 |
+
|
| 41 |
+
```python
|
| 42 |
+
self.input_size = (416, 416)
|
| 43 |
+
self.random_size = (10, 20)
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
In this case, the actual multiscale range is [self.random_size[0]\*32, self.random_size[1]\*32], i.e., [320, 640]
|
| 47 |
+
|
| 48 |
+
**Note: You must specify the self.input_size because it is used for initializing resize aug in dataset.**
|
| 49 |
+
|
| 50 |
+
## 3. Single Scale Training
|
| 51 |
+
|
| 52 |
+
If you want to train in a single scale. You need to specify the self.input_size and self.multiscale_range=0:
|
| 53 |
+
|
| 54 |
+
```python
|
| 55 |
+
self.input_size = (416, 416)
|
| 56 |
+
self.multiscale_range = 0
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
**DO NOT** set the self.random_size.
|
exps/default/nano.py
CHANGED
|
@@ -14,8 +14,9 @@ class Exp(MyExp):
|
|
| 14 |
super(Exp, self).__init__()
|
| 15 |
self.depth = 0.33
|
| 16 |
self.width = 0.25
|
| 17 |
-
self.
|
| 18 |
self.random_size = (10, 20)
|
|
|
|
| 19 |
self.test_size = (416, 416)
|
| 20 |
self.mosaic_prob = 0.5
|
| 21 |
self.enable_mixup = False
|
|
|
|
| 14 |
super(Exp, self).__init__()
|
| 15 |
self.depth = 0.33
|
| 16 |
self.width = 0.25
|
| 17 |
+
self.input_size = (416, 416)
|
| 18 |
self.random_size = (10, 20)
|
| 19 |
+
self.mosaic_scale = (0.5, 1.5)
|
| 20 |
self.test_size = (416, 416)
|
| 21 |
self.mosaic_prob = 0.5
|
| 22 |
self.enable_mixup = False
|
exps/default/yolov3.py
CHANGED
|
@@ -33,63 +33,3 @@ class Exp(MyExp):
|
|
| 33 |
|
| 34 |
return self.model
|
| 35 |
|
| 36 |
-
def get_data_loader(self, batch_size, is_distributed, no_aug=False):
|
| 37 |
-
import torch.distributed as dist
|
| 38 |
-
|
| 39 |
-
from yolox.data import (
|
| 40 |
-
COCODataset,
|
| 41 |
-
DataLoader,
|
| 42 |
-
InfiniteSampler,
|
| 43 |
-
MosaicDetection,
|
| 44 |
-
TrainTransform,
|
| 45 |
-
YoloBatchSampler
|
| 46 |
-
)
|
| 47 |
-
|
| 48 |
-
dataset = COCODataset(
|
| 49 |
-
data_dir='data/COCO/',
|
| 50 |
-
json_file=self.train_ann,
|
| 51 |
-
img_size=self.input_size,
|
| 52 |
-
preproc=TrainTransform(
|
| 53 |
-
rgb_means=(0.485, 0.456, 0.406),
|
| 54 |
-
std=(0.229, 0.224, 0.225),
|
| 55 |
-
max_labels=50
|
| 56 |
-
),
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
dataset = MosaicDetection(
|
| 60 |
-
dataset,
|
| 61 |
-
mosaic=not no_aug,
|
| 62 |
-
img_size=self.input_size,
|
| 63 |
-
preproc=TrainTransform(
|
| 64 |
-
rgb_means=(0.485, 0.456, 0.406),
|
| 65 |
-
std=(0.229, 0.224, 0.225),
|
| 66 |
-
max_labels=120
|
| 67 |
-
),
|
| 68 |
-
degrees=self.degrees,
|
| 69 |
-
translate=self.translate,
|
| 70 |
-
scale=self.scale,
|
| 71 |
-
shear=self.shear,
|
| 72 |
-
perspective=self.perspective,
|
| 73 |
-
)
|
| 74 |
-
|
| 75 |
-
self.dataset = dataset
|
| 76 |
-
|
| 77 |
-
if is_distributed:
|
| 78 |
-
batch_size = batch_size // dist.get_world_size()
|
| 79 |
-
sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
|
| 80 |
-
else:
|
| 81 |
-
sampler = torch.utils.data.RandomSampler(self.dataset)
|
| 82 |
-
|
| 83 |
-
batch_sampler = YoloBatchSampler(
|
| 84 |
-
sampler=sampler,
|
| 85 |
-
batch_size=batch_size,
|
| 86 |
-
drop_last=False,
|
| 87 |
-
input_dimension=self.input_size,
|
| 88 |
-
mosaic=not no_aug
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
|
| 92 |
-
dataloader_kwargs["batch_sampler"] = batch_sampler
|
| 93 |
-
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
|
| 94 |
-
|
| 95 |
-
return train_loader
|
|
|
|
| 33 |
|
| 34 |
return self.model
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exps/default/yolox_tiny.py
CHANGED
|
@@ -12,7 +12,8 @@ class Exp(MyExp):
|
|
| 12 |
super(Exp, self).__init__()
|
| 13 |
self.depth = 0.33
|
| 14 |
self.width = 0.375
|
| 15 |
-
self.
|
|
|
|
| 16 |
self.random_size = (10, 20)
|
| 17 |
self.test_size = (416, 416)
|
| 18 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
|
|
|
| 12 |
super(Exp, self).__init__()
|
| 13 |
self.depth = 0.33
|
| 14 |
self.width = 0.375
|
| 15 |
+
self.input_scale = (416, 416)
|
| 16 |
+
self.mosaic_scale = (0.5, 1.5)
|
| 17 |
self.random_size = (10, 20)
|
| 18 |
self.test_size = (416, 416)
|
| 19 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/example/custom/nano.py
CHANGED
|
@@ -14,7 +14,8 @@ class Exp(MyExp):
|
|
| 14 |
super(Exp, self).__init__()
|
| 15 |
self.depth = 0.33
|
| 16 |
self.width = 0.25
|
| 17 |
-
self.
|
|
|
|
| 18 |
self.random_size = (10, 20)
|
| 19 |
self.test_size = (416, 416)
|
| 20 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
|
|
|
| 14 |
super(Exp, self).__init__()
|
| 15 |
self.depth = 0.33
|
| 16 |
self.width = 0.25
|
| 17 |
+
self.input_size = (416, 416)
|
| 18 |
+
self.mosaic_scale = (0.5, 1.5)
|
| 19 |
self.random_size = (10, 20)
|
| 20 |
self.test_size = (416, 416)
|
| 21 |
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
|
exps/example/yolox_voc/yolox_voc_s.py
CHANGED
|
@@ -49,7 +49,8 @@ class Exp(MyExp):
|
|
| 49 |
preproc=TrainTransform(max_labels=120),
|
| 50 |
degrees=self.degrees,
|
| 51 |
translate=self.translate,
|
| 52 |
-
|
|
|
|
| 53 |
shear=self.shear,
|
| 54 |
perspective=self.perspective,
|
| 55 |
enable_mixup=self.enable_mixup,
|
|
|
|
| 49 |
preproc=TrainTransform(max_labels=120),
|
| 50 |
degrees=self.degrees,
|
| 51 |
translate=self.translate,
|
| 52 |
+
mosaic_scale=self.mosaic_scale,
|
| 53 |
+
mixup_scale=self.mixup_scale,
|
| 54 |
shear=self.shear,
|
| 55 |
perspective=self.perspective,
|
| 56 |
enable_mixup=self.enable_mixup,
|
yolox/core/trainer.py
CHANGED
|
@@ -248,7 +248,7 @@ class Trainer:
|
|
| 248 |
self.meter.clear_meters()
|
| 249 |
|
| 250 |
# random resizing
|
| 251 |
-
if
|
| 252 |
self.input_size = self.exp.random_resize(
|
| 253 |
self.train_loader, self.epoch, self.rank, self.is_distributed
|
| 254 |
)
|
|
|
|
| 248 |
self.meter.clear_meters()
|
| 249 |
|
| 250 |
# random resizing
|
| 251 |
+
if (self.progress_in_iter + 1) % 10 == 0:
|
| 252 |
self.input_size = self.exp.random_resize(
|
| 253 |
self.train_loader, self.epoch, self.rank, self.is_distributed
|
| 254 |
)
|
yolox/data/datasets/mosaicdetection.py
CHANGED
|
@@ -39,9 +39,9 @@ class MosaicDetection(Dataset):
|
|
| 39 |
|
| 40 |
def __init__(
|
| 41 |
self, dataset, img_size, mosaic=True, preproc=None,
|
| 42 |
-
degrees=10.0, translate=0.1,
|
| 43 |
-
shear=2.0, perspective=0.0,
|
| 44 |
-
mosaic_prob=1.0, mixup_prob=1.0, *args
|
| 45 |
):
|
| 46 |
"""
|
| 47 |
|
|
@@ -52,8 +52,8 @@ class MosaicDetection(Dataset):
|
|
| 52 |
preproc (func):
|
| 53 |
degrees (float):
|
| 54 |
translate (float):
|
| 55 |
-
|
| 56 |
-
|
| 57 |
shear (float):
|
| 58 |
perspective (float):
|
| 59 |
enable_mixup (bool):
|
|
@@ -64,10 +64,10 @@ class MosaicDetection(Dataset):
|
|
| 64 |
self.preproc = preproc
|
| 65 |
self.degrees = degrees
|
| 66 |
self.translate = translate
|
| 67 |
-
self.scale =
|
| 68 |
self.shear = shear
|
| 69 |
self.perspective = perspective
|
| 70 |
-
self.mixup_scale =
|
| 71 |
self.enable_mosaic = mosaic
|
| 72 |
self.enable_mixup = enable_mixup
|
| 73 |
self.mosaic_prob = mosaic_prob
|
|
|
|
| 39 |
|
| 40 |
def __init__(
|
| 41 |
self, dataset, img_size, mosaic=True, preproc=None,
|
| 42 |
+
degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
|
| 43 |
+
mixup_scale=(0.5, 1.5), shear=2.0, perspective=0.0,
|
| 44 |
+
enable_mixup=True, mosaic_prob=1.0, mixup_prob=1.0, *args
|
| 45 |
):
|
| 46 |
"""
|
| 47 |
|
|
|
|
| 52 |
preproc (func):
|
| 53 |
degrees (float):
|
| 54 |
translate (float):
|
| 55 |
+
mosaic_scale (tuple):
|
| 56 |
+
mixup_scale (tuple):
|
| 57 |
shear (float):
|
| 58 |
perspective (float):
|
| 59 |
enable_mixup (bool):
|
|
|
|
| 64 |
self.preproc = preproc
|
| 65 |
self.degrees = degrees
|
| 66 |
self.translate = translate
|
| 67 |
+
self.scale = mosaic_scale
|
| 68 |
self.shear = shear
|
| 69 |
self.perspective = perspective
|
| 70 |
+
self.mixup_scale = mixup_scale
|
| 71 |
self.enable_mosaic = mosaic
|
| 72 |
self.enable_mixup = enable_mixup
|
| 73 |
self.mosaic_prob = mosaic_prob
|
yolox/exp/yolox_base.py
CHANGED
|
@@ -25,7 +25,12 @@ class Exp(BaseExp):
|
|
| 25 |
# set worker to 4 for shorter dataloader init time
|
| 26 |
self.data_num_workers = 4
|
| 27 |
self.input_size = (640, 640)
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
self.data_dir = None
|
| 30 |
self.train_ann = "instances_train2017.json"
|
| 31 |
self.val_ann = "instances_val2017.json"
|
|
@@ -35,8 +40,8 @@ class Exp(BaseExp):
|
|
| 35 |
self.mixup_prob = 1.0
|
| 36 |
self.degrees = 10.0
|
| 37 |
self.translate = 0.1
|
| 38 |
-
self.
|
| 39 |
-
self.
|
| 40 |
self.shear = 2.0
|
| 41 |
self.perspective = 0.0
|
| 42 |
self.enable_mixup = True
|
|
@@ -116,7 +121,8 @@ class Exp(BaseExp):
|
|
| 116 |
preproc=TrainTransform(max_labels=120),
|
| 117 |
degrees=self.degrees,
|
| 118 |
translate=self.translate,
|
| 119 |
-
|
|
|
|
| 120 |
shear=self.shear,
|
| 121 |
perspective=self.perspective,
|
| 122 |
enable_mixup=self.enable_mixup,
|
|
@@ -154,6 +160,10 @@ class Exp(BaseExp):
|
|
| 154 |
|
| 155 |
if rank == 0:
|
| 156 |
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
size = random.randint(*self.random_size)
|
| 158 |
size = (int(32 * size), 32 * int(size * size_factor))
|
| 159 |
tensor[0] = size[0]
|
|
|
|
| 25 |
# set worker to 4 for shorter dataloader init time
|
| 26 |
self.data_num_workers = 4
|
| 27 |
self.input_size = (640, 640)
|
| 28 |
+
# Actual multiscale ranges: [640-5*32, 640+5*32].
|
| 29 |
+
# To disable multiscale training, set the
|
| 30 |
+
# self.multiscale_range to 0.
|
| 31 |
+
self.multiscale_range = 5
|
| 32 |
+
# You can uncomment this line to specify a multiscale range
|
| 33 |
+
# self.random_size = (14, 26)
|
| 34 |
self.data_dir = None
|
| 35 |
self.train_ann = "instances_train2017.json"
|
| 36 |
self.val_ann = "instances_val2017.json"
|
|
|
|
| 40 |
self.mixup_prob = 1.0
|
| 41 |
self.degrees = 10.0
|
| 42 |
self.translate = 0.1
|
| 43 |
+
self.mosaic_scale = (0.1, 2)
|
| 44 |
+
self.mixup_scale = (0.5, 1.5)
|
| 45 |
self.shear = 2.0
|
| 46 |
self.perspective = 0.0
|
| 47 |
self.enable_mixup = True
|
|
|
|
| 121 |
preproc=TrainTransform(max_labels=120),
|
| 122 |
degrees=self.degrees,
|
| 123 |
translate=self.translate,
|
| 124 |
+
mosaic_scale=self.mosaic_scale,
|
| 125 |
+
mixup_scale=self.mixup_scale,
|
| 126 |
shear=self.shear,
|
| 127 |
perspective=self.perspective,
|
| 128 |
enable_mixup=self.enable_mixup,
|
|
|
|
| 160 |
|
| 161 |
if rank == 0:
|
| 162 |
size_factor = self.input_size[1] * 1.0 / self.input_size[0]
|
| 163 |
+
if not hasattr(self, 'random_size'):
|
| 164 |
+
min_size = int(self.input_size[0] / 32) - self.multiscale_range
|
| 165 |
+
max_size = int(self.input_size[0] / 32) + self.multiscale_range
|
| 166 |
+
self.random_size = (min_size, max_size)
|
| 167 |
size = random.randint(*self.random_size)
|
| 168 |
size = (int(32 * size), 32 * int(size * size_factor))
|
| 169 |
tensor[0] = size[0]
|