HilmiZr commited on
Commit
bc6a1aa
·
1 Parent(s): 53132c1

added: ultralytics folder

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ultralytics/__init__.py +8 -7
  2. ultralytics/cfg/__init__.py +465 -0
  3. ultralytics/cfg/datasets/Argoverse.yaml +73 -0
  4. ultralytics/cfg/datasets/DOTAv2.yaml +37 -0
  5. ultralytics/cfg/datasets/GlobalWheat2020.yaml +54 -0
  6. ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
  7. ultralytics/cfg/datasets/Objects365.yaml +443 -0
  8. ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
  9. ultralytics/cfg/datasets/VOC.yaml +100 -0
  10. ultralytics/cfg/datasets/VisDrone.yaml +73 -0
  11. ultralytics/cfg/datasets/coco-pose.yaml +38 -0
  12. ultralytics/cfg/datasets/coco.yaml +115 -0
  13. ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
  14. ultralytics/cfg/datasets/coco128.yaml +101 -0
  15. ultralytics/cfg/datasets/coco8-pose.yaml +25 -0
  16. ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
  17. ultralytics/cfg/datasets/coco8.yaml +101 -0
  18. ultralytics/cfg/datasets/open-images-v7.yaml +661 -0
  19. ultralytics/cfg/datasets/tiger-pose.yaml +24 -0
  20. ultralytics/cfg/datasets/xView.yaml +153 -0
  21. ultralytics/cfg/default.yaml +119 -0
  22. ultralytics/cfg/models/README.md +40 -0
  23. ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +50 -0
  24. ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +42 -0
  25. ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +42 -0
  26. ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +54 -0
  27. ultralytics/cfg/models/v3/yolov3-spp.yaml +48 -0
  28. ultralytics/cfg/models/v3/yolov3-tiny.yaml +39 -0
  29. ultralytics/cfg/models/v3/yolov3.yaml +48 -0
  30. ultralytics/cfg/models/v5/yolov5-p6.yaml +61 -0
  31. ultralytics/cfg/models/v5/yolov5.yaml +50 -0
  32. ultralytics/cfg/models/v6/yolov6.yaml +53 -0
  33. ultralytics/cfg/models/v8/yolov8-cls.yaml +29 -0
  34. ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +54 -0
  35. ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +56 -0
  36. ultralytics/cfg/models/v8/yolov8-ghost.yaml +47 -0
  37. ultralytics/cfg/models/v8/yolov8-p2.yaml +54 -0
  38. ultralytics/cfg/models/v8/yolov8-p6.yaml +56 -0
  39. ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +57 -0
  40. ultralytics/cfg/models/v8/yolov8-pose.yaml +47 -0
  41. ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +46 -0
  42. ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +56 -0
  43. ultralytics/cfg/models/v8/yolov8-seg.yaml +46 -0
  44. ultralytics/cfg/models/v8/yolov8.yaml +46 -0
  45. ultralytics/cfg/trackers/botsort.yaml +18 -0
  46. ultralytics/cfg/trackers/bytetrack.yaml +11 -0
  47. ultralytics/data/__init__.py +8 -0
  48. ultralytics/data/annotator.py +50 -0
  49. ultralytics/data/augment.py +1107 -0
  50. ultralytics/data/base.py +304 -0
ultralytics/__init__.py CHANGED
@@ -1,11 +1,12 @@
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
 
3
- __version__ = '8.0.107'
4
 
5
- from ultralytics.hub import start
6
- from ultralytics.vit.rtdetr import RTDETR
7
- from ultralytics.vit.sam import SAM
8
- from ultralytics.yolo.engine.model import YOLO
9
- from ultralytics.yolo.utils.checks import check_yolo as checks
 
10
 
11
- __all__ = '__version__', 'YOLO', 'SAM', 'RTDETR', 'checks', 'start' # allow simpler import
 
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
 
3
+ __version__ = '8.0.225'
4
 
5
+ from ultralytics.models import RTDETR, SAM, YOLO
6
+ from ultralytics.models.fastsam import FastSAM
7
+ from ultralytics.models.nas import NAS
8
+ from ultralytics.utils import SETTINGS as settings
9
+ from ultralytics.utils.checks import check_yolo as checks
10
+ from ultralytics.utils.downloads import download
11
 
12
+ __all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings'
ultralytics/cfg/__init__.py ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ import contextlib
4
+ import shutil
5
+ import sys
6
+ from pathlib import Path
7
+ from types import SimpleNamespace
8
+ from typing import Dict, List, Union
9
+
10
+ from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT, RUNS_DIR,
11
+ SETTINGS, SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks,
12
+ colorstr, deprecation_warn, yaml_load, yaml_print)
13
+
14
+ # Define valid tasks and modes
15
+ MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
16
+ TASKS = 'detect', 'segment', 'classify', 'pose'
17
+ TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'}
18
+ TASK2MODEL = {
19
+ 'detect': 'yolov8n.pt',
20
+ 'segment': 'yolov8n-seg.pt',
21
+ 'classify': 'yolov8n-cls.pt',
22
+ 'pose': 'yolov8n-pose.pt'}
23
+ TASK2METRIC = {
24
+ 'detect': 'metrics/mAP50-95(B)',
25
+ 'segment': 'metrics/mAP50-95(M)',
26
+ 'classify': 'metrics/accuracy_top1',
27
+ 'pose': 'metrics/mAP50-95(P)'}
28
+
29
+ CLI_HELP_MSG = \
30
+ f"""
31
+ Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
32
+
33
+ yolo TASK MODE ARGS
34
+
35
+ Where TASK (optional) is one of {TASKS}
36
+ MODE (required) is one of {MODES}
37
+ ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
38
+ See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
39
+
40
+ 1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
41
+ yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
42
+
43
+ 2. Predict a YouTube video using a pretrained segmentation model at image size 320:
44
+ yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
45
+
46
+ 3. Val a pretrained detection model at batch-size 1 and image size 640:
47
+ yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
48
+
49
+ 4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
50
+ yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
51
+
52
+ 5. Run special commands:
53
+ yolo help
54
+ yolo checks
55
+ yolo version
56
+ yolo settings
57
+ yolo copy-cfg
58
+ yolo cfg
59
+
60
+ Docs: https://docs.ultralytics.com
61
+ Community: https://community.ultralytics.com
62
+ GitHub: https://github.com/ultralytics/ultralytics
63
+ """
64
+
65
+ # Define keys for arg type checks
66
+ CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear'
67
+ CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr',
68
+ 'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud',
69
+ 'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction') # fraction floats 0.0 - 1.0
70
+ CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride',
71
+ 'line_width', 'workspace', 'nbs', 'save_period')
72
+ CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',
73
+ 'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop',
74
+ 'save_frames', 'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks',
75
+ 'show_boxes', 'keras', 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile')
76
+
77
+
78
+ def cfg2dict(cfg):
79
+ """
80
+ Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
81
+
82
+ Args:
83
+ cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
84
+
85
+ Returns:
86
+ cfg (dict): Configuration object in dictionary format.
87
+ """
88
+ if isinstance(cfg, (str, Path)):
89
+ cfg = yaml_load(cfg) # load dict
90
+ elif isinstance(cfg, SimpleNamespace):
91
+ cfg = vars(cfg) # convert to dict
92
+ return cfg
93
+
94
+
95
+ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
96
+ """
97
+ Load and merge configuration data from a file or dictionary.
98
+
99
+ Args:
100
+ cfg (str | Path | Dict | SimpleNamespace): Configuration data.
101
+ overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
102
+
103
+ Returns:
104
+ (SimpleNamespace): Training arguments namespace.
105
+ """
106
+ cfg = cfg2dict(cfg)
107
+
108
+ # Merge overrides
109
+ if overrides:
110
+ overrides = cfg2dict(overrides)
111
+ if 'save_dir' not in cfg:
112
+ overrides.pop('save_dir', None) # special override keys to ignore
113
+ check_dict_alignment(cfg, overrides)
114
+ cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides)
115
+
116
+ # Special handling for numeric project/name
117
+ for k in 'project', 'name':
118
+ if k in cfg and isinstance(cfg[k], (int, float)):
119
+ cfg[k] = str(cfg[k])
120
+ if cfg.get('name') == 'model': # assign model to 'name' arg
121
+ cfg['name'] = cfg.get('model', '').split('.')[0]
122
+ LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
123
+
124
+ # Type and Value checks
125
+ for k, v in cfg.items():
126
+ if v is not None: # None values may be from optional args
127
+ if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
128
+ raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
129
+ f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
130
+ elif k in CFG_FRACTION_KEYS:
131
+ if not isinstance(v, (int, float)):
132
+ raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
133
+ f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
134
+ if not (0.0 <= v <= 1.0):
135
+ raise ValueError(f"'{k}={v}' is an invalid value. "
136
+ f"Valid '{k}' values are between 0.0 and 1.0.")
137
+ elif k in CFG_INT_KEYS and not isinstance(v, int):
138
+ raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
139
+ f"'{k}' must be an int (i.e. '{k}=8')")
140
+ elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
141
+ raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
142
+ f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')")
143
+
144
+ # Return instance
145
+ return IterableSimpleNamespace(**cfg)
146
+
147
+
148
+ def get_save_dir(args, name=None):
149
+ """Return save_dir as created from train/val/predict arguments."""
150
+
151
+ if getattr(args, 'save_dir', None):
152
+ save_dir = args.save_dir
153
+ else:
154
+ from ultralytics.utils.files import increment_path
155
+
156
+ project = args.project or (ROOT.parent / 'tests/tmp/runs' if TESTS_RUNNING else RUNS_DIR) / args.task
157
+ name = name or args.name or f'{args.mode}'
158
+ save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
159
+
160
+ return Path(save_dir)
161
+
162
+
163
+ def _handle_deprecation(custom):
164
+ """Hardcoded function to handle deprecated config keys."""
165
+
166
+ for key in custom.copy().keys():
167
+ if key == 'boxes':
168
+ deprecation_warn(key, 'show_boxes')
169
+ custom['show_boxes'] = custom.pop('boxes')
170
+ if key == 'hide_labels':
171
+ deprecation_warn(key, 'show_labels')
172
+ custom['show_labels'] = custom.pop('hide_labels') == 'False'
173
+ if key == 'hide_conf':
174
+ deprecation_warn(key, 'show_conf')
175
+ custom['show_conf'] = custom.pop('hide_conf') == 'False'
176
+ if key == 'line_thickness':
177
+ deprecation_warn(key, 'line_width')
178
+ custom['line_width'] = custom.pop('line_thickness')
179
+
180
+ return custom
181
+
182
+
183
+ def check_dict_alignment(base: Dict, custom: Dict, e=None):
184
+ """
185
+ This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
186
+ any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
187
+
188
+ Args:
189
+ custom (dict): a dictionary of custom configuration options
190
+ base (dict): a dictionary of base configuration options
191
+ e (Error, optional): An optional error that is passed by the calling function.
192
+ """
193
+ custom = _handle_deprecation(custom)
194
+ base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
195
+ mismatched = [k for k in custom_keys if k not in base_keys]
196
+ if mismatched:
197
+ from difflib import get_close_matches
198
+
199
+ string = ''
200
+ for x in mismatched:
201
+ matches = get_close_matches(x, base_keys) # key list
202
+ matches = [f'{k}={base[k]}' if base.get(k) is not None else k for k in matches]
203
+ match_str = f'Similar arguments are i.e. {matches}.' if matches else ''
204
+ string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
205
+ raise SyntaxError(string + CLI_HELP_MSG) from e
206
+
207
+
208
+ def merge_equals_args(args: List[str]) -> List[str]:
209
+ """
210
+ Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
211
+ argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
212
+
213
+ Args:
214
+ args (List[str]): A list of strings where each element is an argument.
215
+
216
+ Returns:
217
+ List[str]: A list of strings where the arguments around isolated '=' are merged.
218
+ """
219
+ new_args = []
220
+ for i, arg in enumerate(args):
221
+ if arg == '=' and 0 < i < len(args) - 1: # merge ['arg', '=', 'val']
222
+ new_args[-1] += f'={args[i + 1]}'
223
+ del args[i + 1]
224
+ elif arg.endswith('=') and i < len(args) - 1 and '=' not in args[i + 1]: # merge ['arg=', 'val']
225
+ new_args.append(f'{arg}{args[i + 1]}')
226
+ del args[i + 1]
227
+ elif arg.startswith('=') and i > 0: # merge ['arg', '=val']
228
+ new_args[-1] += arg
229
+ else:
230
+ new_args.append(arg)
231
+ return new_args
232
+
233
+
234
+ def handle_yolo_hub(args: List[str]) -> None:
235
+ """
236
+ Handle Ultralytics HUB command-line interface (CLI) commands.
237
+
238
+ This function processes Ultralytics HUB CLI commands such as login and logout.
239
+ It should be called when executing a script with arguments related to HUB authentication.
240
+
241
+ Args:
242
+ args (List[str]): A list of command line arguments
243
+
244
+ Example:
245
+ ```bash
246
+ python my_script.py hub login your_api_key
247
+ ```
248
+ """
249
+ from ultralytics import hub
250
+
251
+ if args[0] == 'login':
252
+ key = args[1] if len(args) > 1 else ''
253
+ # Log in to Ultralytics HUB using the provided API key
254
+ hub.login(key)
255
+ elif args[0] == 'logout':
256
+ # Log out from Ultralytics HUB
257
+ hub.logout()
258
+
259
+
260
+ def handle_yolo_settings(args: List[str]) -> None:
261
+ """
262
+ Handle YOLO settings command-line interface (CLI) commands.
263
+
264
+ This function processes YOLO settings CLI commands such as reset.
265
+ It should be called when executing a script with arguments related to YOLO settings management.
266
+
267
+ Args:
268
+ args (List[str]): A list of command line arguments for YOLO settings management.
269
+
270
+ Example:
271
+ ```bash
272
+ python my_script.py yolo settings reset
273
+ ```
274
+ """
275
+ url = 'https://docs.ultralytics.com/quickstart/#ultralytics-settings' # help URL
276
+ try:
277
+ if any(args):
278
+ if args[0] == 'reset':
279
+ SETTINGS_YAML.unlink() # delete the settings file
280
+ SETTINGS.reset() # create new settings
281
+ LOGGER.info('Settings reset successfully') # inform the user that settings have been reset
282
+ else: # save a new setting
283
+ new = dict(parse_key_value_pair(a) for a in args)
284
+ check_dict_alignment(SETTINGS, new)
285
+ SETTINGS.update(new)
286
+
287
+ LOGGER.info(f'💡 Learn about settings at {url}')
288
+ yaml_print(SETTINGS_YAML) # print the current settings
289
+ except Exception as e:
290
+ LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
291
+
292
+
293
+ def parse_key_value_pair(pair):
294
+ """Parse one 'key=value' pair and return key and value."""
295
+ k, v = pair.split('=', 1) # split on first '=' sign
296
+ k, v = k.strip(), v.strip() # remove spaces
297
+ assert v, f"missing '{k}' value"
298
+ return k, smart_value(v)
299
+
300
+
301
+ def smart_value(v):
302
+ """Convert a string to an underlying type such as int, float, bool, etc."""
303
+ v_lower = v.lower()
304
+ if v_lower == 'none':
305
+ return None
306
+ elif v_lower == 'true':
307
+ return True
308
+ elif v_lower == 'false':
309
+ return False
310
+ else:
311
+ with contextlib.suppress(Exception):
312
+ return eval(v)
313
+ return v
314
+
315
+
316
+ def entrypoint(debug=''):
317
+ """
318
+ This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
319
+ to the package.
320
+
321
+ This function allows for:
322
+ - passing mandatory YOLO args as a list of strings
323
+ - specifying the task to be performed, either 'detect', 'segment' or 'classify'
324
+ - specifying the mode, either 'train', 'val', 'test', or 'predict'
325
+ - running special modes like 'checks'
326
+ - passing overrides to the package's configuration
327
+
328
+ It uses the package's default cfg and initializes it using the passed overrides.
329
+ Then it calls the CLI function with the composed cfg
330
+ """
331
+ args = (debug.split(' ') if debug else sys.argv)[1:]
332
+ if not args: # no arguments passed
333
+ LOGGER.info(CLI_HELP_MSG)
334
+ return
335
+
336
+ special = {
337
+ 'help': lambda: LOGGER.info(CLI_HELP_MSG),
338
+ 'checks': checks.collect_system_info,
339
+ 'version': lambda: LOGGER.info(__version__),
340
+ 'settings': lambda: handle_yolo_settings(args[1:]),
341
+ 'cfg': lambda: yaml_print(DEFAULT_CFG_PATH),
342
+ 'hub': lambda: handle_yolo_hub(args[1:]),
343
+ 'login': lambda: handle_yolo_hub(args),
344
+ 'copy-cfg': copy_default_cfg}
345
+ full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
346
+
347
+ # Define common misuses of special commands, i.e. -h, -help, --help
348
+ special.update({k[0]: v for k, v in special.items()}) # singular
349
+ special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith('s')}) # singular
350
+ special = {**special, **{f'-{k}': v for k, v in special.items()}, **{f'--{k}': v for k, v in special.items()}}
351
+
352
+ overrides = {} # basic overrides, i.e. imgsz=320
353
+ for a in merge_equals_args(args): # merge spaces around '=' sign
354
+ if a.startswith('--'):
355
+ LOGGER.warning(f"WARNING ⚠️ '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
356
+ a = a[2:]
357
+ if a.endswith(','):
358
+ LOGGER.warning(f"WARNING ⚠️ '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
359
+ a = a[:-1]
360
+ if '=' in a:
361
+ try:
362
+ k, v = parse_key_value_pair(a)
363
+ if k == 'cfg' and v is not None: # custom.yaml passed
364
+ LOGGER.info(f'Overriding {DEFAULT_CFG_PATH} with {v}')
365
+ overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != 'cfg'}
366
+ else:
367
+ overrides[k] = v
368
+ except (NameError, SyntaxError, ValueError, AssertionError) as e:
369
+ check_dict_alignment(full_args_dict, {a: ''}, e)
370
+
371
+ elif a in TASKS:
372
+ overrides['task'] = a
373
+ elif a in MODES:
374
+ overrides['mode'] = a
375
+ elif a.lower() in special:
376
+ special[a.lower()]()
377
+ return
378
+ elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
379
+ overrides[a] = True # auto-True for default bool args, i.e. 'yolo show' sets show=True
380
+ elif a in DEFAULT_CFG_DICT:
381
+ raise SyntaxError(f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
382
+ f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}")
383
+ else:
384
+ check_dict_alignment(full_args_dict, {a: ''})
385
+
386
+ # Check keys
387
+ check_dict_alignment(full_args_dict, overrides)
388
+
389
+ # Mode
390
+ mode = overrides.get('mode')
391
+ if mode is None:
392
+ mode = DEFAULT_CFG.mode or 'predict'
393
+ LOGGER.warning(f"WARNING ⚠️ 'mode' is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
394
+ elif mode not in MODES:
395
+ raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
396
+
397
+ # Task
398
+ task = overrides.pop('task', None)
399
+ if task:
400
+ if task not in TASKS:
401
+ raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
402
+ if 'model' not in overrides:
403
+ overrides['model'] = TASK2MODEL[task]
404
+
405
+ # Model
406
+ model = overrides.pop('model', DEFAULT_CFG.model)
407
+ if model is None:
408
+ model = 'yolov8n.pt'
409
+ LOGGER.warning(f"WARNING ⚠️ 'model' is missing. Using default 'model={model}'.")
410
+ overrides['model'] = model
411
+ stem = Path(model).stem.lower()
412
+ if 'rtdetr' in stem: # guess architecture
413
+ from ultralytics import RTDETR
414
+ model = RTDETR(model) # no task argument
415
+ elif 'fastsam' in stem:
416
+ from ultralytics import FastSAM
417
+ model = FastSAM(model)
418
+ elif 'sam' in stem:
419
+ from ultralytics import SAM
420
+ model = SAM(model)
421
+ else:
422
+ from ultralytics import YOLO
423
+ model = YOLO(model, task=task)
424
+ if isinstance(overrides.get('pretrained'), str):
425
+ model.load(overrides['pretrained'])
426
+
427
+ # Task Update
428
+ if task != model.task:
429
+ if task:
430
+ LOGGER.warning(f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
431
+ f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model.")
432
+ task = model.task
433
+
434
+ # Mode
435
+ if mode in ('predict', 'track') and 'source' not in overrides:
436
+ overrides['source'] = DEFAULT_CFG.source or ASSETS
437
+ LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using default 'source={overrides['source']}'.")
438
+ elif mode in ('train', 'val'):
439
+ if 'data' not in overrides and 'resume' not in overrides:
440
+ overrides['data'] = TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
441
+ LOGGER.warning(f"WARNING ⚠️ 'data' is missing. Using default 'data={overrides['data']}'.")
442
+ elif mode == 'export':
443
+ if 'format' not in overrides:
444
+ overrides['format'] = DEFAULT_CFG.format or 'torchscript'
445
+ LOGGER.warning(f"WARNING ⚠️ 'format' is missing. Using default 'format={overrides['format']}'.")
446
+
447
+ # Run command in python
448
+ getattr(model, mode)(**overrides) # default args from model
449
+
450
+ # Show help
451
+ LOGGER.info(f'💡 Learn more at https://docs.ultralytics.com/modes/{mode}')
452
+
453
+
454
+ # Special modes --------------------------------------------------------------------------------------------------------
455
+ def copy_default_cfg():
456
+ """Copy and create a new default configuration file with '_copy' appended to its name."""
457
+ new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml')
458
+ shutil.copy2(DEFAULT_CFG_PATH, new_file)
459
+ LOGGER.info(f'{DEFAULT_CFG_PATH} copied to {new_file}\n'
460
+ f"Example YOLO command with this new custom cfg:\n yolo cfg='{new_file}' imgsz=320 batch=8")
461
+
462
+
463
+ if __name__ == '__main__':
464
+ # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
465
+ entrypoint(debug='')
ultralytics/cfg/datasets/Argoverse.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
3
+ # Example usage: yolo train data=Argoverse.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── Argoverse ← downloads here (31.5 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/Argoverse # dataset root dir
12
+ train: Argoverse-1.1/images/train/ # train images (relative to 'path') 39384 images
13
+ val: Argoverse-1.1/images/val/ # val images (relative to 'path') 15062 images
14
+ test: Argoverse-1.1/images/test/ # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
15
+
16
+ # Classes
17
+ names:
18
+ 0: person
19
+ 1: bicycle
20
+ 2: car
21
+ 3: motorcycle
22
+ 4: bus
23
+ 5: truck
24
+ 6: traffic_light
25
+ 7: stop_sign
26
+
27
+
28
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
29
+ download: |
30
+ import json
31
+ from tqdm import tqdm
32
+ from ultralytics.utils.downloads import download
33
+ from pathlib import Path
34
+
35
+ def argoverse2yolo(set):
36
+ labels = {}
37
+ a = json.load(open(set, "rb"))
38
+ for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
39
+ img_id = annot['image_id']
40
+ img_name = a['images'][img_id]['name']
41
+ img_label_name = f'{img_name[:-3]}txt'
42
+
43
+ cls = annot['category_id'] # instance class id
44
+ x_center, y_center, width, height = annot['bbox']
45
+ x_center = (x_center + width / 2) / 1920.0 # offset and scale
46
+ y_center = (y_center + height / 2) / 1200.0 # offset and scale
47
+ width /= 1920.0 # scale
48
+ height /= 1200.0 # scale
49
+
50
+ img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
51
+ if not img_dir.exists():
52
+ img_dir.mkdir(parents=True, exist_ok=True)
53
+
54
+ k = str(img_dir / img_label_name)
55
+ if k not in labels:
56
+ labels[k] = []
57
+ labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
58
+
59
+ for k in labels:
60
+ with open(k, "w") as f:
61
+ f.writelines(labels[k])
62
+
63
+
64
+ # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
65
+ dir = Path(yaml['path']) # dataset root dir
66
+ urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
67
+ download(urls, dir=dir)
68
+
69
+ # Convert
70
+ annotations_dir = 'Argoverse-HD/annotations/'
71
+ (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images') # rename 'tracking' to 'images'
72
+ for d in "train.json", "val.json":
73
+ argoverse2yolo(dir / annotations_dir / d) # convert Argoverse annotations to YOLO labels
ultralytics/cfg/datasets/DOTAv2.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
3
+ # Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── dota2 ← downloads here (2GB)
8
+
9
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
10
+ path: ../datasets/DOTAv2 # dataset root dir
11
+ train: images/train # train images (relative to 'path') 1411 images
12
+ val: images/val # val images (relative to 'path') 458 images
13
+ test: images/test # test images (optional) 937 images
14
+
15
+ # Classes for DOTA 2.0
16
+ names:
17
+ 0: plane
18
+ 1: ship
19
+ 2: storage tank
20
+ 3: baseball diamond
21
+ 4: tennis court
22
+ 5: basketball court
23
+ 6: ground track field
24
+ 7: harbor
25
+ 8: bridge
26
+ 9: large vehicle
27
+ 10: small vehicle
28
+ 11: helicopter
29
+ 12: roundabout
30
+ 13: soccer ball field
31
+ 14: swimming pool
32
+ 15: container crane
33
+ 16: airport
34
+ 17: helipad
35
+
36
+ # Download script/URL (optional)
37
+ download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip
ultralytics/cfg/datasets/GlobalWheat2020.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
3
+ # Example usage: yolo train data=GlobalWheat2020.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── GlobalWheat2020 ← downloads here (7.0 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/GlobalWheat2020 # dataset root dir
12
+ train: # train images (relative to 'path') 3422 images
13
+ - images/arvalis_1
14
+ - images/arvalis_2
15
+ - images/arvalis_3
16
+ - images/ethz_1
17
+ - images/rres_1
18
+ - images/inrae_1
19
+ - images/usask_1
20
+ val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
21
+ - images/ethz_1
22
+ test: # test images (optional) 1276 images
23
+ - images/utokyo_1
24
+ - images/utokyo_2
25
+ - images/nau_1
26
+ - images/uq_1
27
+
28
+ # Classes
29
+ names:
30
+ 0: wheat_head
31
+
32
+
33
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
34
+ download: |
35
+ from ultralytics.utils.downloads import download
36
+ from pathlib import Path
37
+
38
+ # Download
39
+ dir = Path(yaml['path']) # dataset root dir
40
+ urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
41
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
42
+ download(urls, dir=dir)
43
+
44
+ # Make Directories
45
+ for p in 'annotations', 'images', 'labels':
46
+ (dir / p).mkdir(parents=True, exist_ok=True)
47
+
48
+ # Move
49
+ for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
50
+ 'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
51
+ (dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p) # move to /images
52
+ f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json') # json file
53
+ if f.exists():
54
+ f.rename((dir / 'annotations' / p).with_suffix('.json')) # move to /annotations
ultralytics/cfg/datasets/ImageNet.yaml ADDED
@@ -0,0 +1,2025 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
3
+ # Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
4
+ # Example usage: yolo train task=classify data=imagenet
5
+ # parent
6
+ # ├── ultralytics
7
+ # └── datasets
8
+ # └── imagenet ← downloads here (144 GB)
9
+
10
+
11
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
+ path: ../datasets/imagenet # dataset root dir
13
+ train: train # train images (relative to 'path') 1281167 images
14
+ val: val # val images (relative to 'path') 50000 images
15
+ test: # test images (optional)
16
+
17
+ # Classes
18
+ names:
19
+ 0: tench
20
+ 1: goldfish
21
+ 2: great white shark
22
+ 3: tiger shark
23
+ 4: hammerhead shark
24
+ 5: electric ray
25
+ 6: stingray
26
+ 7: cock
27
+ 8: hen
28
+ 9: ostrich
29
+ 10: brambling
30
+ 11: goldfinch
31
+ 12: house finch
32
+ 13: junco
33
+ 14: indigo bunting
34
+ 15: American robin
35
+ 16: bulbul
36
+ 17: jay
37
+ 18: magpie
38
+ 19: chickadee
39
+ 20: American dipper
40
+ 21: kite
41
+ 22: bald eagle
42
+ 23: vulture
43
+ 24: great grey owl
44
+ 25: fire salamander
45
+ 26: smooth newt
46
+ 27: newt
47
+ 28: spotted salamander
48
+ 29: axolotl
49
+ 30: American bullfrog
50
+ 31: tree frog
51
+ 32: tailed frog
52
+ 33: loggerhead sea turtle
53
+ 34: leatherback sea turtle
54
+ 35: mud turtle
55
+ 36: terrapin
56
+ 37: box turtle
57
+ 38: banded gecko
58
+ 39: green iguana
59
+ 40: Carolina anole
60
+ 41: desert grassland whiptail lizard
61
+ 42: agama
62
+ 43: frilled-necked lizard
63
+ 44: alligator lizard
64
+ 45: Gila monster
65
+ 46: European green lizard
66
+ 47: chameleon
67
+ 48: Komodo dragon
68
+ 49: Nile crocodile
69
+ 50: American alligator
70
+ 51: triceratops
71
+ 52: worm snake
72
+ 53: ring-necked snake
73
+ 54: eastern hog-nosed snake
74
+ 55: smooth green snake
75
+ 56: kingsnake
76
+ 57: garter snake
77
+ 58: water snake
78
+ 59: vine snake
79
+ 60: night snake
80
+ 61: boa constrictor
81
+ 62: African rock python
82
+ 63: Indian cobra
83
+ 64: green mamba
84
+ 65: sea snake
85
+ 66: Saharan horned viper
86
+ 67: eastern diamondback rattlesnake
87
+ 68: sidewinder
88
+ 69: trilobite
89
+ 70: harvestman
90
+ 71: scorpion
91
+ 72: yellow garden spider
92
+ 73: barn spider
93
+ 74: European garden spider
94
+ 75: southern black widow
95
+ 76: tarantula
96
+ 77: wolf spider
97
+ 78: tick
98
+ 79: centipede
99
+ 80: black grouse
100
+ 81: ptarmigan
101
+ 82: ruffed grouse
102
+ 83: prairie grouse
103
+ 84: peacock
104
+ 85: quail
105
+ 86: partridge
106
+ 87: grey parrot
107
+ 88: macaw
108
+ 89: sulphur-crested cockatoo
109
+ 90: lorikeet
110
+ 91: coucal
111
+ 92: bee eater
112
+ 93: hornbill
113
+ 94: hummingbird
114
+ 95: jacamar
115
+ 96: toucan
116
+ 97: duck
117
+ 98: red-breasted merganser
118
+ 99: goose
119
+ 100: black swan
120
+ 101: tusker
121
+ 102: echidna
122
+ 103: platypus
123
+ 104: wallaby
124
+ 105: koala
125
+ 106: wombat
126
+ 107: jellyfish
127
+ 108: sea anemone
128
+ 109: brain coral
129
+ 110: flatworm
130
+ 111: nematode
131
+ 112: conch
132
+ 113: snail
133
+ 114: slug
134
+ 115: sea slug
135
+ 116: chiton
136
+ 117: chambered nautilus
137
+ 118: Dungeness crab
138
+ 119: rock crab
139
+ 120: fiddler crab
140
+ 121: red king crab
141
+ 122: American lobster
142
+ 123: spiny lobster
143
+ 124: crayfish
144
+ 125: hermit crab
145
+ 126: isopod
146
+ 127: white stork
147
+ 128: black stork
148
+ 129: spoonbill
149
+ 130: flamingo
150
+ 131: little blue heron
151
+ 132: great egret
152
+ 133: bittern
153
+ 134: crane (bird)
154
+ 135: limpkin
155
+ 136: common gallinule
156
+ 137: American coot
157
+ 138: bustard
158
+ 139: ruddy turnstone
159
+ 140: dunlin
160
+ 141: common redshank
161
+ 142: dowitcher
162
+ 143: oystercatcher
163
+ 144: pelican
164
+ 145: king penguin
165
+ 146: albatross
166
+ 147: grey whale
167
+ 148: killer whale
168
+ 149: dugong
169
+ 150: sea lion
170
+ 151: Chihuahua
171
+ 152: Japanese Chin
172
+ 153: Maltese
173
+ 154: Pekingese
174
+ 155: Shih Tzu
175
+ 156: King Charles Spaniel
176
+ 157: Papillon
177
+ 158: toy terrier
178
+ 159: Rhodesian Ridgeback
179
+ 160: Afghan Hound
180
+ 161: Basset Hound
181
+ 162: Beagle
182
+ 163: Bloodhound
183
+ 164: Bluetick Coonhound
184
+ 165: Black and Tan Coonhound
185
+ 166: Treeing Walker Coonhound
186
+ 167: English foxhound
187
+ 168: Redbone Coonhound
188
+ 169: borzoi
189
+ 170: Irish Wolfhound
190
+ 171: Italian Greyhound
191
+ 172: Whippet
192
+ 173: Ibizan Hound
193
+ 174: Norwegian Elkhound
194
+ 175: Otterhound
195
+ 176: Saluki
196
+ 177: Scottish Deerhound
197
+ 178: Weimaraner
198
+ 179: Staffordshire Bull Terrier
199
+ 180: American Staffordshire Terrier
200
+ 181: Bedlington Terrier
201
+ 182: Border Terrier
202
+ 183: Kerry Blue Terrier
203
+ 184: Irish Terrier
204
+ 185: Norfolk Terrier
205
+ 186: Norwich Terrier
206
+ 187: Yorkshire Terrier
207
+ 188: Wire Fox Terrier
208
+ 189: Lakeland Terrier
209
+ 190: Sealyham Terrier
210
+ 191: Airedale Terrier
211
+ 192: Cairn Terrier
212
+ 193: Australian Terrier
213
+ 194: Dandie Dinmont Terrier
214
+ 195: Boston Terrier
215
+ 196: Miniature Schnauzer
216
+ 197: Giant Schnauzer
217
+ 198: Standard Schnauzer
218
+ 199: Scottish Terrier
219
+ 200: Tibetan Terrier
220
+ 201: Australian Silky Terrier
221
+ 202: Soft-coated Wheaten Terrier
222
+ 203: West Highland White Terrier
223
+ 204: Lhasa Apso
224
+ 205: Flat-Coated Retriever
225
+ 206: Curly-coated Retriever
226
+ 207: Golden Retriever
227
+ 208: Labrador Retriever
228
+ 209: Chesapeake Bay Retriever
229
+ 210: German Shorthaired Pointer
230
+ 211: Vizsla
231
+ 212: English Setter
232
+ 213: Irish Setter
233
+ 214: Gordon Setter
234
+ 215: Brittany
235
+ 216: Clumber Spaniel
236
+ 217: English Springer Spaniel
237
+ 218: Welsh Springer Spaniel
238
+ 219: Cocker Spaniels
239
+ 220: Sussex Spaniel
240
+ 221: Irish Water Spaniel
241
+ 222: Kuvasz
242
+ 223: Schipperke
243
+ 224: Groenendael
244
+ 225: Malinois
245
+ 226: Briard
246
+ 227: Australian Kelpie
247
+ 228: Komondor
248
+ 229: Old English Sheepdog
249
+ 230: Shetland Sheepdog
250
+ 231: collie
251
+ 232: Border Collie
252
+ 233: Bouvier des Flandres
253
+ 234: Rottweiler
254
+ 235: German Shepherd Dog
255
+ 236: Dobermann
256
+ 237: Miniature Pinscher
257
+ 238: Greater Swiss Mountain Dog
258
+ 239: Bernese Mountain Dog
259
+ 240: Appenzeller Sennenhund
260
+ 241: Entlebucher Sennenhund
261
+ 242: Boxer
262
+ 243: Bullmastiff
263
+ 244: Tibetan Mastiff
264
+ 245: French Bulldog
265
+ 246: Great Dane
266
+ 247: St. Bernard
267
+ 248: husky
268
+ 249: Alaskan Malamute
269
+ 250: Siberian Husky
270
+ 251: Dalmatian
271
+ 252: Affenpinscher
272
+ 253: Basenji
273
+ 254: pug
274
+ 255: Leonberger
275
+ 256: Newfoundland
276
+ 257: Pyrenean Mountain Dog
277
+ 258: Samoyed
278
+ 259: Pomeranian
279
+ 260: Chow Chow
280
+ 261: Keeshond
281
+ 262: Griffon Bruxellois
282
+ 263: Pembroke Welsh Corgi
283
+ 264: Cardigan Welsh Corgi
284
+ 265: Toy Poodle
285
+ 266: Miniature Poodle
286
+ 267: Standard Poodle
287
+ 268: Mexican hairless dog
288
+ 269: grey wolf
289
+ 270: Alaskan tundra wolf
290
+ 271: red wolf
291
+ 272: coyote
292
+ 273: dingo
293
+ 274: dhole
294
+ 275: African wild dog
295
+ 276: hyena
296
+ 277: red fox
297
+ 278: kit fox
298
+ 279: Arctic fox
299
+ 280: grey fox
300
+ 281: tabby cat
301
+ 282: tiger cat
302
+ 283: Persian cat
303
+ 284: Siamese cat
304
+ 285: Egyptian Mau
305
+ 286: cougar
306
+ 287: lynx
307
+ 288: leopard
308
+ 289: snow leopard
309
+ 290: jaguar
310
+ 291: lion
311
+ 292: tiger
312
+ 293: cheetah
313
+ 294: brown bear
314
+ 295: American black bear
315
+ 296: polar bear
316
+ 297: sloth bear
317
+ 298: mongoose
318
+ 299: meerkat
319
+ 300: tiger beetle
320
+ 301: ladybug
321
+ 302: ground beetle
322
+ 303: longhorn beetle
323
+ 304: leaf beetle
324
+ 305: dung beetle
325
+ 306: rhinoceros beetle
326
+ 307: weevil
327
+ 308: fly
328
+ 309: bee
329
+ 310: ant
330
+ 311: grasshopper
331
+ 312: cricket
332
+ 313: stick insect
333
+ 314: cockroach
334
+ 315: mantis
335
+ 316: cicada
336
+ 317: leafhopper
337
+ 318: lacewing
338
+ 319: dragonfly
339
+ 320: damselfly
340
+ 321: red admiral
341
+ 322: ringlet
342
+ 323: monarch butterfly
343
+ 324: small white
344
+ 325: sulphur butterfly
345
+ 326: gossamer-winged butterfly
346
+ 327: starfish
347
+ 328: sea urchin
348
+ 329: sea cucumber
349
+ 330: cottontail rabbit
350
+ 331: hare
351
+ 332: Angora rabbit
352
+ 333: hamster
353
+ 334: porcupine
354
+ 335: fox squirrel
355
+ 336: marmot
356
+ 337: beaver
357
+ 338: guinea pig
358
+ 339: common sorrel
359
+ 340: zebra
360
+ 341: pig
361
+ 342: wild boar
362
+ 343: warthog
363
+ 344: hippopotamus
364
+ 345: ox
365
+ 346: water buffalo
366
+ 347: bison
367
+ 348: ram
368
+ 349: bighorn sheep
369
+ 350: Alpine ibex
370
+ 351: hartebeest
371
+ 352: impala
372
+ 353: gazelle
373
+ 354: dromedary
374
+ 355: llama
375
+ 356: weasel
376
+ 357: mink
377
+ 358: European polecat
378
+ 359: black-footed ferret
379
+ 360: otter
380
+ 361: skunk
381
+ 362: badger
382
+ 363: armadillo
383
+ 364: three-toed sloth
384
+ 365: orangutan
385
+ 366: gorilla
386
+ 367: chimpanzee
387
+ 368: gibbon
388
+ 369: siamang
389
+ 370: guenon
390
+ 371: patas monkey
391
+ 372: baboon
392
+ 373: macaque
393
+ 374: langur
394
+ 375: black-and-white colobus
395
+ 376: proboscis monkey
396
+ 377: marmoset
397
+ 378: white-headed capuchin
398
+ 379: howler monkey
399
+ 380: titi
400
+ 381: Geoffroy's spider monkey
401
+ 382: common squirrel monkey
402
+ 383: ring-tailed lemur
403
+ 384: indri
404
+ 385: Asian elephant
405
+ 386: African bush elephant
406
+ 387: red panda
407
+ 388: giant panda
408
+ 389: snoek
409
+ 390: eel
410
+ 391: coho salmon
411
+ 392: rock beauty
412
+ 393: clownfish
413
+ 394: sturgeon
414
+ 395: garfish
415
+ 396: lionfish
416
+ 397: pufferfish
417
+ 398: abacus
418
+ 399: abaya
419
+ 400: academic gown
420
+ 401: accordion
421
+ 402: acoustic guitar
422
+ 403: aircraft carrier
423
+ 404: airliner
424
+ 405: airship
425
+ 406: altar
426
+ 407: ambulance
427
+ 408: amphibious vehicle
428
+ 409: analog clock
429
+ 410: apiary
430
+ 411: apron
431
+ 412: waste container
432
+ 413: assault rifle
433
+ 414: backpack
434
+ 415: bakery
435
+ 416: balance beam
436
+ 417: balloon
437
+ 418: ballpoint pen
438
+ 419: Band-Aid
439
+ 420: banjo
440
+ 421: baluster
441
+ 422: barbell
442
+ 423: barber chair
443
+ 424: barbershop
444
+ 425: barn
445
+ 426: barometer
446
+ 427: barrel
447
+ 428: wheelbarrow
448
+ 429: baseball
449
+ 430: basketball
450
+ 431: bassinet
451
+ 432: bassoon
452
+ 433: swimming cap
453
+ 434: bath towel
454
+ 435: bathtub
455
+ 436: station wagon
456
+ 437: lighthouse
457
+ 438: beaker
458
+ 439: military cap
459
+ 440: beer bottle
460
+ 441: beer glass
461
+ 442: bell-cot
462
+ 443: bib
463
+ 444: tandem bicycle
464
+ 445: bikini
465
+ 446: ring binder
466
+ 447: binoculars
467
+ 448: birdhouse
468
+ 449: boathouse
469
+ 450: bobsleigh
470
+ 451: bolo tie
471
+ 452: poke bonnet
472
+ 453: bookcase
473
+ 454: bookstore
474
+ 455: bottle cap
475
+ 456: bow
476
+ 457: bow tie
477
+ 458: brass
478
+ 459: bra
479
+ 460: breakwater
480
+ 461: breastplate
481
+ 462: broom
482
+ 463: bucket
483
+ 464: buckle
484
+ 465: bulletproof vest
485
+ 466: high-speed train
486
+ 467: butcher shop
487
+ 468: taxicab
488
+ 469: cauldron
489
+ 470: candle
490
+ 471: cannon
491
+ 472: canoe
492
+ 473: can opener
493
+ 474: cardigan
494
+ 475: car mirror
495
+ 476: carousel
496
+ 477: tool kit
497
+ 478: carton
498
+ 479: car wheel
499
+ 480: automated teller machine
500
+ 481: cassette
501
+ 482: cassette player
502
+ 483: castle
503
+ 484: catamaran
504
+ 485: CD player
505
+ 486: cello
506
+ 487: mobile phone
507
+ 488: chain
508
+ 489: chain-link fence
509
+ 490: chain mail
510
+ 491: chainsaw
511
+ 492: chest
512
+ 493: chiffonier
513
+ 494: chime
514
+ 495: china cabinet
515
+ 496: Christmas stocking
516
+ 497: church
517
+ 498: movie theater
518
+ 499: cleaver
519
+ 500: cliff dwelling
520
+ 501: cloak
521
+ 502: clogs
522
+ 503: cocktail shaker
523
+ 504: coffee mug
524
+ 505: coffeemaker
525
+ 506: coil
526
+ 507: combination lock
527
+ 508: computer keyboard
528
+ 509: confectionery store
529
+ 510: container ship
530
+ 511: convertible
531
+ 512: corkscrew
532
+ 513: cornet
533
+ 514: cowboy boot
534
+ 515: cowboy hat
535
+ 516: cradle
536
+ 517: crane (machine)
537
+ 518: crash helmet
538
+ 519: crate
539
+ 520: infant bed
540
+ 521: Crock Pot
541
+ 522: croquet ball
542
+ 523: crutch
543
+ 524: cuirass
544
+ 525: dam
545
+ 526: desk
546
+ 527: desktop computer
547
+ 528: rotary dial telephone
548
+ 529: diaper
549
+ 530: digital clock
550
+ 531: digital watch
551
+ 532: dining table
552
+ 533: dishcloth
553
+ 534: dishwasher
554
+ 535: disc brake
555
+ 536: dock
556
+ 537: dog sled
557
+ 538: dome
558
+ 539: doormat
559
+ 540: drilling rig
560
+ 541: drum
561
+ 542: drumstick
562
+ 543: dumbbell
563
+ 544: Dutch oven
564
+ 545: electric fan
565
+ 546: electric guitar
566
+ 547: electric locomotive
567
+ 548: entertainment center
568
+ 549: envelope
569
+ 550: espresso machine
570
+ 551: face powder
571
+ 552: feather boa
572
+ 553: filing cabinet
573
+ 554: fireboat
574
+ 555: fire engine
575
+ 556: fire screen sheet
576
+ 557: flagpole
577
+ 558: flute
578
+ 559: folding chair
579
+ 560: football helmet
580
+ 561: forklift
581
+ 562: fountain
582
+ 563: fountain pen
583
+ 564: four-poster bed
584
+ 565: freight car
585
+ 566: French horn
586
+ 567: frying pan
587
+ 568: fur coat
588
+ 569: garbage truck
589
+ 570: gas mask
590
+ 571: gas pump
591
+ 572: goblet
592
+ 573: go-kart
593
+ 574: golf ball
594
+ 575: golf cart
595
+ 576: gondola
596
+ 577: gong
597
+ 578: gown
598
+ 579: grand piano
599
+ 580: greenhouse
600
+ 581: grille
601
+ 582: grocery store
602
+ 583: guillotine
603
+ 584: barrette
604
+ 585: hair spray
605
+ 586: half-track
606
+ 587: hammer
607
+ 588: hamper
608
+ 589: hair dryer
609
+ 590: hand-held computer
610
+ 591: handkerchief
611
+ 592: hard disk drive
612
+ 593: harmonica
613
+ 594: harp
614
+ 595: harvester
615
+ 596: hatchet
616
+ 597: holster
617
+ 598: home theater
618
+ 599: honeycomb
619
+ 600: hook
620
+ 601: hoop skirt
621
+ 602: horizontal bar
622
+ 603: horse-drawn vehicle
623
+ 604: hourglass
624
+ 605: iPod
625
+ 606: clothes iron
626
+ 607: jack-o'-lantern
627
+ 608: jeans
628
+ 609: jeep
629
+ 610: T-shirt
630
+ 611: jigsaw puzzle
631
+ 612: pulled rickshaw
632
+ 613: joystick
633
+ 614: kimono
634
+ 615: knee pad
635
+ 616: knot
636
+ 617: lab coat
637
+ 618: ladle
638
+ 619: lampshade
639
+ 620: laptop computer
640
+ 621: lawn mower
641
+ 622: lens cap
642
+ 623: paper knife
643
+ 624: library
644
+ 625: lifeboat
645
+ 626: lighter
646
+ 627: limousine
647
+ 628: ocean liner
648
+ 629: lipstick
649
+ 630: slip-on shoe
650
+ 631: lotion
651
+ 632: speaker
652
+ 633: loupe
653
+ 634: sawmill
654
+ 635: magnetic compass
655
+ 636: mail bag
656
+ 637: mailbox
657
+ 638: tights
658
+ 639: tank suit
659
+ 640: manhole cover
660
+ 641: maraca
661
+ 642: marimba
662
+ 643: mask
663
+ 644: match
664
+ 645: maypole
665
+ 646: maze
666
+ 647: measuring cup
667
+ 648: medicine chest
668
+ 649: megalith
669
+ 650: microphone
670
+ 651: microwave oven
671
+ 652: military uniform
672
+ 653: milk can
673
+ 654: minibus
674
+ 655: miniskirt
675
+ 656: minivan
676
+ 657: missile
677
+ 658: mitten
678
+ 659: mixing bowl
679
+ 660: mobile home
680
+ 661: Model T
681
+ 662: modem
682
+ 663: monastery
683
+ 664: monitor
684
+ 665: moped
685
+ 666: mortar
686
+ 667: square academic cap
687
+ 668: mosque
688
+ 669: mosquito net
689
+ 670: scooter
690
+ 671: mountain bike
691
+ 672: tent
692
+ 673: computer mouse
693
+ 674: mousetrap
694
+ 675: moving van
695
+ 676: muzzle
696
+ 677: nail
697
+ 678: neck brace
698
+ 679: necklace
699
+ 680: nipple
700
+ 681: notebook computer
701
+ 682: obelisk
702
+ 683: oboe
703
+ 684: ocarina
704
+ 685: odometer
705
+ 686: oil filter
706
+ 687: organ
707
+ 688: oscilloscope
708
+ 689: overskirt
709
+ 690: bullock cart
710
+ 691: oxygen mask
711
+ 692: packet
712
+ 693: paddle
713
+ 694: paddle wheel
714
+ 695: padlock
715
+ 696: paintbrush
716
+ 697: pajamas
717
+ 698: palace
718
+ 699: pan flute
719
+ 700: paper towel
720
+ 701: parachute
721
+ 702: parallel bars
722
+ 703: park bench
723
+ 704: parking meter
724
+ 705: passenger car
725
+ 706: patio
726
+ 707: payphone
727
+ 708: pedestal
728
+ 709: pencil case
729
+ 710: pencil sharpener
730
+ 711: perfume
731
+ 712: Petri dish
732
+ 713: photocopier
733
+ 714: plectrum
734
+ 715: Pickelhaube
735
+ 716: picket fence
736
+ 717: pickup truck
737
+ 718: pier
738
+ 719: piggy bank
739
+ 720: pill bottle
740
+ 721: pillow
741
+ 722: ping-pong ball
742
+ 723: pinwheel
743
+ 724: pirate ship
744
+ 725: pitcher
745
+ 726: hand plane
746
+ 727: planetarium
747
+ 728: plastic bag
748
+ 729: plate rack
749
+ 730: plow
750
+ 731: plunger
751
+ 732: Polaroid camera
752
+ 733: pole
753
+ 734: police van
754
+ 735: poncho
755
+ 736: billiard table
756
+ 737: soda bottle
757
+ 738: pot
758
+ 739: potter's wheel
759
+ 740: power drill
760
+ 741: prayer rug
761
+ 742: printer
762
+ 743: prison
763
+ 744: projectile
764
+ 745: projector
765
+ 746: hockey puck
766
+ 747: punching bag
767
+ 748: purse
768
+ 749: quill
769
+ 750: quilt
770
+ 751: race car
771
+ 752: racket
772
+ 753: radiator
773
+ 754: radio
774
+ 755: radio telescope
775
+ 756: rain barrel
776
+ 757: recreational vehicle
777
+ 758: reel
778
+ 759: reflex camera
779
+ 760: refrigerator
780
+ 761: remote control
781
+ 762: restaurant
782
+ 763: revolver
783
+ 764: rifle
784
+ 765: rocking chair
785
+ 766: rotisserie
786
+ 767: eraser
787
+ 768: rugby ball
788
+ 769: ruler
789
+ 770: running shoe
790
+ 771: safe
791
+ 772: safety pin
792
+ 773: salt shaker
793
+ 774: sandal
794
+ 775: sarong
795
+ 776: saxophone
796
+ 777: scabbard
797
+ 778: weighing scale
798
+ 779: school bus
799
+ 780: schooner
800
+ 781: scoreboard
801
+ 782: CRT screen
802
+ 783: screw
803
+ 784: screwdriver
804
+ 785: seat belt
805
+ 786: sewing machine
806
+ 787: shield
807
+ 788: shoe store
808
+ 789: shoji
809
+ 790: shopping basket
810
+ 791: shopping cart
811
+ 792: shovel
812
+ 793: shower cap
813
+ 794: shower curtain
814
+ 795: ski
815
+ 796: ski mask
816
+ 797: sleeping bag
817
+ 798: slide rule
818
+ 799: sliding door
819
+ 800: slot machine
820
+ 801: snorkel
821
+ 802: snowmobile
822
+ 803: snowplow
823
+ 804: soap dispenser
824
+ 805: soccer ball
825
+ 806: sock
826
+ 807: solar thermal collector
827
+ 808: sombrero
828
+ 809: soup bowl
829
+ 810: space bar
830
+ 811: space heater
831
+ 812: space shuttle
832
+ 813: spatula
833
+ 814: motorboat
834
+ 815: spider web
835
+ 816: spindle
836
+ 817: sports car
837
+ 818: spotlight
838
+ 819: stage
839
+ 820: steam locomotive
840
+ 821: through arch bridge
841
+ 822: steel drum
842
+ 823: stethoscope
843
+ 824: scarf
844
+ 825: stone wall
845
+ 826: stopwatch
846
+ 827: stove
847
+ 828: strainer
848
+ 829: tram
849
+ 830: stretcher
850
+ 831: couch
851
+ 832: stupa
852
+ 833: submarine
853
+ 834: suit
854
+ 835: sundial
855
+ 836: sunglass
856
+ 837: sunglasses
857
+ 838: sunscreen
858
+ 839: suspension bridge
859
+ 840: mop
860
+ 841: sweatshirt
861
+ 842: swimsuit
862
+ 843: swing
863
+ 844: switch
864
+ 845: syringe
865
+ 846: table lamp
866
+ 847: tank
867
+ 848: tape player
868
+ 849: teapot
869
+ 850: teddy bear
870
+ 851: television
871
+ 852: tennis ball
872
+ 853: thatched roof
873
+ 854: front curtain
874
+ 855: thimble
875
+ 856: threshing machine
876
+ 857: throne
877
+ 858: tile roof
878
+ 859: toaster
879
+ 860: tobacco shop
880
+ 861: toilet seat
881
+ 862: torch
882
+ 863: totem pole
883
+ 864: tow truck
884
+ 865: toy store
885
+ 866: tractor
886
+ 867: semi-trailer truck
887
+ 868: tray
888
+ 869: trench coat
889
+ 870: tricycle
890
+ 871: trimaran
891
+ 872: tripod
892
+ 873: triumphal arch
893
+ 874: trolleybus
894
+ 875: trombone
895
+ 876: tub
896
+ 877: turnstile
897
+ 878: typewriter keyboard
898
+ 879: umbrella
899
+ 880: unicycle
900
+ 881: upright piano
901
+ 882: vacuum cleaner
902
+ 883: vase
903
+ 884: vault
904
+ 885: velvet
905
+ 886: vending machine
906
+ 887: vestment
907
+ 888: viaduct
908
+ 889: violin
909
+ 890: volleyball
910
+ 891: waffle iron
911
+ 892: wall clock
912
+ 893: wallet
913
+ 894: wardrobe
914
+ 895: military aircraft
915
+ 896: sink
916
+ 897: washing machine
917
+ 898: water bottle
918
+ 899: water jug
919
+ 900: water tower
920
+ 901: whiskey jug
921
+ 902: whistle
922
+ 903: wig
923
+ 904: window screen
924
+ 905: window shade
925
+ 906: Windsor tie
926
+ 907: wine bottle
927
+ 908: wing
928
+ 909: wok
929
+ 910: wooden spoon
930
+ 911: wool
931
+ 912: split-rail fence
932
+ 913: shipwreck
933
+ 914: yawl
934
+ 915: yurt
935
+ 916: website
936
+ 917: comic book
937
+ 918: crossword
938
+ 919: traffic sign
939
+ 920: traffic light
940
+ 921: dust jacket
941
+ 922: menu
942
+ 923: plate
943
+ 924: guacamole
944
+ 925: consomme
945
+ 926: hot pot
946
+ 927: trifle
947
+ 928: ice cream
948
+ 929: ice pop
949
+ 930: baguette
950
+ 931: bagel
951
+ 932: pretzel
952
+ 933: cheeseburger
953
+ 934: hot dog
954
+ 935: mashed potato
955
+ 936: cabbage
956
+ 937: broccoli
957
+ 938: cauliflower
958
+ 939: zucchini
959
+ 940: spaghetti squash
960
+ 941: acorn squash
961
+ 942: butternut squash
962
+ 943: cucumber
963
+ 944: artichoke
964
+ 945: bell pepper
965
+ 946: cardoon
966
+ 947: mushroom
967
+ 948: Granny Smith
968
+ 949: strawberry
969
+ 950: orange
970
+ 951: lemon
971
+ 952: fig
972
+ 953: pineapple
973
+ 954: banana
974
+ 955: jackfruit
975
+ 956: custard apple
976
+ 957: pomegranate
977
+ 958: hay
978
+ 959: carbonara
979
+ 960: chocolate syrup
980
+ 961: dough
981
+ 962: meatloaf
982
+ 963: pizza
983
+ 964: pot pie
984
+ 965: burrito
985
+ 966: red wine
986
+ 967: espresso
987
+ 968: cup
988
+ 969: eggnog
989
+ 970: alp
990
+ 971: bubble
991
+ 972: cliff
992
+ 973: coral reef
993
+ 974: geyser
994
+ 975: lakeshore
995
+ 976: promontory
996
+ 977: shoal
997
+ 978: seashore
998
+ 979: valley
999
+ 980: volcano
1000
+ 981: baseball player
1001
+ 982: bridegroom
1002
+ 983: scuba diver
1003
+ 984: rapeseed
1004
+ 985: daisy
1005
+ 986: yellow lady's slipper
1006
+ 987: corn
1007
+ 988: acorn
1008
+ 989: rose hip
1009
+ 990: horse chestnut seed
1010
+ 991: coral fungus
1011
+ 992: agaric
1012
+ 993: gyromitra
1013
+ 994: stinkhorn mushroom
1014
+ 995: earth star
1015
+ 996: hen-of-the-woods
1016
+ 997: bolete
1017
+ 998: ear
1018
+ 999: toilet paper
1019
+
1020
+ # Imagenet class codes to human-readable names
1021
+ map:
1022
+ n01440764: tench
1023
+ n01443537: goldfish
1024
+ n01484850: great_white_shark
1025
+ n01491361: tiger_shark
1026
+ n01494475: hammerhead
1027
+ n01496331: electric_ray
1028
+ n01498041: stingray
1029
+ n01514668: cock
1030
+ n01514859: hen
1031
+ n01518878: ostrich
1032
+ n01530575: brambling
1033
+ n01531178: goldfinch
1034
+ n01532829: house_finch
1035
+ n01534433: junco
1036
+ n01537544: indigo_bunting
1037
+ n01558993: robin
1038
+ n01560419: bulbul
1039
+ n01580077: jay
1040
+ n01582220: magpie
1041
+ n01592084: chickadee
1042
+ n01601694: water_ouzel
1043
+ n01608432: kite
1044
+ n01614925: bald_eagle
1045
+ n01616318: vulture
1046
+ n01622779: great_grey_owl
1047
+ n01629819: European_fire_salamander
1048
+ n01630670: common_newt
1049
+ n01631663: eft
1050
+ n01632458: spotted_salamander
1051
+ n01632777: axolotl
1052
+ n01641577: bullfrog
1053
+ n01644373: tree_frog
1054
+ n01644900: tailed_frog
1055
+ n01664065: loggerhead
1056
+ n01665541: leatherback_turtle
1057
+ n01667114: mud_turtle
1058
+ n01667778: terrapin
1059
+ n01669191: box_turtle
1060
+ n01675722: banded_gecko
1061
+ n01677366: common_iguana
1062
+ n01682714: American_chameleon
1063
+ n01685808: whiptail
1064
+ n01687978: agama
1065
+ n01688243: frilled_lizard
1066
+ n01689811: alligator_lizard
1067
+ n01692333: Gila_monster
1068
+ n01693334: green_lizard
1069
+ n01694178: African_chameleon
1070
+ n01695060: Komodo_dragon
1071
+ n01697457: African_crocodile
1072
+ n01698640: American_alligator
1073
+ n01704323: triceratops
1074
+ n01728572: thunder_snake
1075
+ n01728920: ringneck_snake
1076
+ n01729322: hognose_snake
1077
+ n01729977: green_snake
1078
+ n01734418: king_snake
1079
+ n01735189: garter_snake
1080
+ n01737021: water_snake
1081
+ n01739381: vine_snake
1082
+ n01740131: night_snake
1083
+ n01742172: boa_constrictor
1084
+ n01744401: rock_python
1085
+ n01748264: Indian_cobra
1086
+ n01749939: green_mamba
1087
+ n01751748: sea_snake
1088
+ n01753488: horned_viper
1089
+ n01755581: diamondback
1090
+ n01756291: sidewinder
1091
+ n01768244: trilobite
1092
+ n01770081: harvestman
1093
+ n01770393: scorpion
1094
+ n01773157: black_and_gold_garden_spider
1095
+ n01773549: barn_spider
1096
+ n01773797: garden_spider
1097
+ n01774384: black_widow
1098
+ n01774750: tarantula
1099
+ n01775062: wolf_spider
1100
+ n01776313: tick
1101
+ n01784675: centipede
1102
+ n01795545: black_grouse
1103
+ n01796340: ptarmigan
1104
+ n01797886: ruffed_grouse
1105
+ n01798484: prairie_chicken
1106
+ n01806143: peacock
1107
+ n01806567: quail
1108
+ n01807496: partridge
1109
+ n01817953: African_grey
1110
+ n01818515: macaw
1111
+ n01819313: sulphur-crested_cockatoo
1112
+ n01820546: lorikeet
1113
+ n01824575: coucal
1114
+ n01828970: bee_eater
1115
+ n01829413: hornbill
1116
+ n01833805: hummingbird
1117
+ n01843065: jacamar
1118
+ n01843383: toucan
1119
+ n01847000: drake
1120
+ n01855032: red-breasted_merganser
1121
+ n01855672: goose
1122
+ n01860187: black_swan
1123
+ n01871265: tusker
1124
+ n01872401: echidna
1125
+ n01873310: platypus
1126
+ n01877812: wallaby
1127
+ n01882714: koala
1128
+ n01883070: wombat
1129
+ n01910747: jellyfish
1130
+ n01914609: sea_anemone
1131
+ n01917289: brain_coral
1132
+ n01924916: flatworm
1133
+ n01930112: nematode
1134
+ n01943899: conch
1135
+ n01944390: snail
1136
+ n01945685: slug
1137
+ n01950731: sea_slug
1138
+ n01955084: chiton
1139
+ n01968897: chambered_nautilus
1140
+ n01978287: Dungeness_crab
1141
+ n01978455: rock_crab
1142
+ n01980166: fiddler_crab
1143
+ n01981276: king_crab
1144
+ n01983481: American_lobster
1145
+ n01984695: spiny_lobster
1146
+ n01985128: crayfish
1147
+ n01986214: hermit_crab
1148
+ n01990800: isopod
1149
+ n02002556: white_stork
1150
+ n02002724: black_stork
1151
+ n02006656: spoonbill
1152
+ n02007558: flamingo
1153
+ n02009229: little_blue_heron
1154
+ n02009912: American_egret
1155
+ n02011460: bittern
1156
+ n02012849: crane_(bird)
1157
+ n02013706: limpkin
1158
+ n02017213: European_gallinule
1159
+ n02018207: American_coot
1160
+ n02018795: bustard
1161
+ n02025239: ruddy_turnstone
1162
+ n02027492: red-backed_sandpiper
1163
+ n02028035: redshank
1164
+ n02033041: dowitcher
1165
+ n02037110: oystercatcher
1166
+ n02051845: pelican
1167
+ n02056570: king_penguin
1168
+ n02058221: albatross
1169
+ n02066245: grey_whale
1170
+ n02071294: killer_whale
1171
+ n02074367: dugong
1172
+ n02077923: sea_lion
1173
+ n02085620: Chihuahua
1174
+ n02085782: Japanese_spaniel
1175
+ n02085936: Maltese_dog
1176
+ n02086079: Pekinese
1177
+ n02086240: Shih-Tzu
1178
+ n02086646: Blenheim_spaniel
1179
+ n02086910: papillon
1180
+ n02087046: toy_terrier
1181
+ n02087394: Rhodesian_ridgeback
1182
+ n02088094: Afghan_hound
1183
+ n02088238: basset
1184
+ n02088364: beagle
1185
+ n02088466: bloodhound
1186
+ n02088632: bluetick
1187
+ n02089078: black-and-tan_coonhound
1188
+ n02089867: Walker_hound
1189
+ n02089973: English_foxhound
1190
+ n02090379: redbone
1191
+ n02090622: borzoi
1192
+ n02090721: Irish_wolfhound
1193
+ n02091032: Italian_greyhound
1194
+ n02091134: whippet
1195
+ n02091244: Ibizan_hound
1196
+ n02091467: Norwegian_elkhound
1197
+ n02091635: otterhound
1198
+ n02091831: Saluki
1199
+ n02092002: Scottish_deerhound
1200
+ n02092339: Weimaraner
1201
+ n02093256: Staffordshire_bullterrier
1202
+ n02093428: American_Staffordshire_terrier
1203
+ n02093647: Bedlington_terrier
1204
+ n02093754: Border_terrier
1205
+ n02093859: Kerry_blue_terrier
1206
+ n02093991: Irish_terrier
1207
+ n02094114: Norfolk_terrier
1208
+ n02094258: Norwich_terrier
1209
+ n02094433: Yorkshire_terrier
1210
+ n02095314: wire-haired_fox_terrier
1211
+ n02095570: Lakeland_terrier
1212
+ n02095889: Sealyham_terrier
1213
+ n02096051: Airedale
1214
+ n02096177: cairn
1215
+ n02096294: Australian_terrier
1216
+ n02096437: Dandie_Dinmont
1217
+ n02096585: Boston_bull
1218
+ n02097047: miniature_schnauzer
1219
+ n02097130: giant_schnauzer
1220
+ n02097209: standard_schnauzer
1221
+ n02097298: Scotch_terrier
1222
+ n02097474: Tibetan_terrier
1223
+ n02097658: silky_terrier
1224
+ n02098105: soft-coated_wheaten_terrier
1225
+ n02098286: West_Highland_white_terrier
1226
+ n02098413: Lhasa
1227
+ n02099267: flat-coated_retriever
1228
+ n02099429: curly-coated_retriever
1229
+ n02099601: golden_retriever
1230
+ n02099712: Labrador_retriever
1231
+ n02099849: Chesapeake_Bay_retriever
1232
+ n02100236: German_short-haired_pointer
1233
+ n02100583: vizsla
1234
+ n02100735: English_setter
1235
+ n02100877: Irish_setter
1236
+ n02101006: Gordon_setter
1237
+ n02101388: Brittany_spaniel
1238
+ n02101556: clumber
1239
+ n02102040: English_springer
1240
+ n02102177: Welsh_springer_spaniel
1241
+ n02102318: cocker_spaniel
1242
+ n02102480: Sussex_spaniel
1243
+ n02102973: Irish_water_spaniel
1244
+ n02104029: kuvasz
1245
+ n02104365: schipperke
1246
+ n02105056: groenendael
1247
+ n02105162: malinois
1248
+ n02105251: briard
1249
+ n02105412: kelpie
1250
+ n02105505: komondor
1251
+ n02105641: Old_English_sheepdog
1252
+ n02105855: Shetland_sheepdog
1253
+ n02106030: collie
1254
+ n02106166: Border_collie
1255
+ n02106382: Bouvier_des_Flandres
1256
+ n02106550: Rottweiler
1257
+ n02106662: German_shepherd
1258
+ n02107142: Doberman
1259
+ n02107312: miniature_pinscher
1260
+ n02107574: Greater_Swiss_Mountain_dog
1261
+ n02107683: Bernese_mountain_dog
1262
+ n02107908: Appenzeller
1263
+ n02108000: EntleBucher
1264
+ n02108089: boxer
1265
+ n02108422: bull_mastiff
1266
+ n02108551: Tibetan_mastiff
1267
+ n02108915: French_bulldog
1268
+ n02109047: Great_Dane
1269
+ n02109525: Saint_Bernard
1270
+ n02109961: Eskimo_dog
1271
+ n02110063: malamute
1272
+ n02110185: Siberian_husky
1273
+ n02110341: dalmatian
1274
+ n02110627: affenpinscher
1275
+ n02110806: basenji
1276
+ n02110958: pug
1277
+ n02111129: Leonberg
1278
+ n02111277: Newfoundland
1279
+ n02111500: Great_Pyrenees
1280
+ n02111889: Samoyed
1281
+ n02112018: Pomeranian
1282
+ n02112137: chow
1283
+ n02112350: keeshond
1284
+ n02112706: Brabancon_griffon
1285
+ n02113023: Pembroke
1286
+ n02113186: Cardigan
1287
+ n02113624: toy_poodle
1288
+ n02113712: miniature_poodle
1289
+ n02113799: standard_poodle
1290
+ n02113978: Mexican_hairless
1291
+ n02114367: timber_wolf
1292
+ n02114548: white_wolf
1293
+ n02114712: red_wolf
1294
+ n02114855: coyote
1295
+ n02115641: dingo
1296
+ n02115913: dhole
1297
+ n02116738: African_hunting_dog
1298
+ n02117135: hyena
1299
+ n02119022: red_fox
1300
+ n02119789: kit_fox
1301
+ n02120079: Arctic_fox
1302
+ n02120505: grey_fox
1303
+ n02123045: tabby
1304
+ n02123159: tiger_cat
1305
+ n02123394: Persian_cat
1306
+ n02123597: Siamese_cat
1307
+ n02124075: Egyptian_cat
1308
+ n02125311: cougar
1309
+ n02127052: lynx
1310
+ n02128385: leopard
1311
+ n02128757: snow_leopard
1312
+ n02128925: jaguar
1313
+ n02129165: lion
1314
+ n02129604: tiger
1315
+ n02130308: cheetah
1316
+ n02132136: brown_bear
1317
+ n02133161: American_black_bear
1318
+ n02134084: ice_bear
1319
+ n02134418: sloth_bear
1320
+ n02137549: mongoose
1321
+ n02138441: meerkat
1322
+ n02165105: tiger_beetle
1323
+ n02165456: ladybug
1324
+ n02167151: ground_beetle
1325
+ n02168699: long-horned_beetle
1326
+ n02169497: leaf_beetle
1327
+ n02172182: dung_beetle
1328
+ n02174001: rhinoceros_beetle
1329
+ n02177972: weevil
1330
+ n02190166: fly
1331
+ n02206856: bee
1332
+ n02219486: ant
1333
+ n02226429: grasshopper
1334
+ n02229544: cricket
1335
+ n02231487: walking_stick
1336
+ n02233338: cockroach
1337
+ n02236044: mantis
1338
+ n02256656: cicada
1339
+ n02259212: leafhopper
1340
+ n02264363: lacewing
1341
+ n02268443: dragonfly
1342
+ n02268853: damselfly
1343
+ n02276258: admiral
1344
+ n02277742: ringlet
1345
+ n02279972: monarch
1346
+ n02280649: cabbage_butterfly
1347
+ n02281406: sulphur_butterfly
1348
+ n02281787: lycaenid
1349
+ n02317335: starfish
1350
+ n02319095: sea_urchin
1351
+ n02321529: sea_cucumber
1352
+ n02325366: wood_rabbit
1353
+ n02326432: hare
1354
+ n02328150: Angora
1355
+ n02342885: hamster
1356
+ n02346627: porcupine
1357
+ n02356798: fox_squirrel
1358
+ n02361337: marmot
1359
+ n02363005: beaver
1360
+ n02364673: guinea_pig
1361
+ n02389026: sorrel
1362
+ n02391049: zebra
1363
+ n02395406: hog
1364
+ n02396427: wild_boar
1365
+ n02397096: warthog
1366
+ n02398521: hippopotamus
1367
+ n02403003: ox
1368
+ n02408429: water_buffalo
1369
+ n02410509: bison
1370
+ n02412080: ram
1371
+ n02415577: bighorn
1372
+ n02417914: ibex
1373
+ n02422106: hartebeest
1374
+ n02422699: impala
1375
+ n02423022: gazelle
1376
+ n02437312: Arabian_camel
1377
+ n02437616: llama
1378
+ n02441942: weasel
1379
+ n02442845: mink
1380
+ n02443114: polecat
1381
+ n02443484: black-footed_ferret
1382
+ n02444819: otter
1383
+ n02445715: skunk
1384
+ n02447366: badger
1385
+ n02454379: armadillo
1386
+ n02457408: three-toed_sloth
1387
+ n02480495: orangutan
1388
+ n02480855: gorilla
1389
+ n02481823: chimpanzee
1390
+ n02483362: gibbon
1391
+ n02483708: siamang
1392
+ n02484975: guenon
1393
+ n02486261: patas
1394
+ n02486410: baboon
1395
+ n02487347: macaque
1396
+ n02488291: langur
1397
+ n02488702: colobus
1398
+ n02489166: proboscis_monkey
1399
+ n02490219: marmoset
1400
+ n02492035: capuchin
1401
+ n02492660: howler_monkey
1402
+ n02493509: titi
1403
+ n02493793: spider_monkey
1404
+ n02494079: squirrel_monkey
1405
+ n02497673: Madagascar_cat
1406
+ n02500267: indri
1407
+ n02504013: Indian_elephant
1408
+ n02504458: African_elephant
1409
+ n02509815: lesser_panda
1410
+ n02510455: giant_panda
1411
+ n02514041: barracouta
1412
+ n02526121: eel
1413
+ n02536864: coho
1414
+ n02606052: rock_beauty
1415
+ n02607072: anemone_fish
1416
+ n02640242: sturgeon
1417
+ n02641379: gar
1418
+ n02643566: lionfish
1419
+ n02655020: puffer
1420
+ n02666196: abacus
1421
+ n02667093: abaya
1422
+ n02669723: academic_gown
1423
+ n02672831: accordion
1424
+ n02676566: acoustic_guitar
1425
+ n02687172: aircraft_carrier
1426
+ n02690373: airliner
1427
+ n02692877: airship
1428
+ n02699494: altar
1429
+ n02701002: ambulance
1430
+ n02704792: amphibian
1431
+ n02708093: analog_clock
1432
+ n02727426: apiary
1433
+ n02730930: apron
1434
+ n02747177: ashcan
1435
+ n02749479: assault_rifle
1436
+ n02769748: backpack
1437
+ n02776631: bakery
1438
+ n02777292: balance_beam
1439
+ n02782093: balloon
1440
+ n02783161: ballpoint
1441
+ n02786058: Band_Aid
1442
+ n02787622: banjo
1443
+ n02788148: bannister
1444
+ n02790996: barbell
1445
+ n02791124: barber_chair
1446
+ n02791270: barbershop
1447
+ n02793495: barn
1448
+ n02794156: barometer
1449
+ n02795169: barrel
1450
+ n02797295: barrow
1451
+ n02799071: baseball
1452
+ n02802426: basketball
1453
+ n02804414: bassinet
1454
+ n02804610: bassoon
1455
+ n02807133: bathing_cap
1456
+ n02808304: bath_towel
1457
+ n02808440: bathtub
1458
+ n02814533: beach_wagon
1459
+ n02814860: beacon
1460
+ n02815834: beaker
1461
+ n02817516: bearskin
1462
+ n02823428: beer_bottle
1463
+ n02823750: beer_glass
1464
+ n02825657: bell_cote
1465
+ n02834397: bib
1466
+ n02835271: bicycle-built-for-two
1467
+ n02837789: bikini
1468
+ n02840245: binder
1469
+ n02841315: binoculars
1470
+ n02843684: birdhouse
1471
+ n02859443: boathouse
1472
+ n02860847: bobsled
1473
+ n02865351: bolo_tie
1474
+ n02869837: bonnet
1475
+ n02870880: bookcase
1476
+ n02871525: bookshop
1477
+ n02877765: bottlecap
1478
+ n02879718: bow
1479
+ n02883205: bow_tie
1480
+ n02892201: brass
1481
+ n02892767: brassiere
1482
+ n02894605: breakwater
1483
+ n02895154: breastplate
1484
+ n02906734: broom
1485
+ n02909870: bucket
1486
+ n02910353: buckle
1487
+ n02916936: bulletproof_vest
1488
+ n02917067: bullet_train
1489
+ n02927161: butcher_shop
1490
+ n02930766: cab
1491
+ n02939185: caldron
1492
+ n02948072: candle
1493
+ n02950826: cannon
1494
+ n02951358: canoe
1495
+ n02951585: can_opener
1496
+ n02963159: cardigan
1497
+ n02965783: car_mirror
1498
+ n02966193: carousel
1499
+ n02966687: carpenter's_kit
1500
+ n02971356: carton
1501
+ n02974003: car_wheel
1502
+ n02977058: cash_machine
1503
+ n02978881: cassette
1504
+ n02979186: cassette_player
1505
+ n02980441: castle
1506
+ n02981792: catamaran
1507
+ n02988304: CD_player
1508
+ n02992211: cello
1509
+ n02992529: cellular_telephone
1510
+ n02999410: chain
1511
+ n03000134: chainlink_fence
1512
+ n03000247: chain_mail
1513
+ n03000684: chain_saw
1514
+ n03014705: chest
1515
+ n03016953: chiffonier
1516
+ n03017168: chime
1517
+ n03018349: china_cabinet
1518
+ n03026506: Christmas_stocking
1519
+ n03028079: church
1520
+ n03032252: cinema
1521
+ n03041632: cleaver
1522
+ n03042490: cliff_dwelling
1523
+ n03045698: cloak
1524
+ n03047690: clog
1525
+ n03062245: cocktail_shaker
1526
+ n03063599: coffee_mug
1527
+ n03063689: coffeepot
1528
+ n03065424: coil
1529
+ n03075370: combination_lock
1530
+ n03085013: computer_keyboard
1531
+ n03089624: confectionery
1532
+ n03095699: container_ship
1533
+ n03100240: convertible
1534
+ n03109150: corkscrew
1535
+ n03110669: cornet
1536
+ n03124043: cowboy_boot
1537
+ n03124170: cowboy_hat
1538
+ n03125729: cradle
1539
+ n03126707: crane_(machine)
1540
+ n03127747: crash_helmet
1541
+ n03127925: crate
1542
+ n03131574: crib
1543
+ n03133878: Crock_Pot
1544
+ n03134739: croquet_ball
1545
+ n03141823: crutch
1546
+ n03146219: cuirass
1547
+ n03160309: dam
1548
+ n03179701: desk
1549
+ n03180011: desktop_computer
1550
+ n03187595: dial_telephone
1551
+ n03188531: diaper
1552
+ n03196217: digital_clock
1553
+ n03197337: digital_watch
1554
+ n03201208: dining_table
1555
+ n03207743: dishrag
1556
+ n03207941: dishwasher
1557
+ n03208938: disk_brake
1558
+ n03216828: dock
1559
+ n03218198: dogsled
1560
+ n03220513: dome
1561
+ n03223299: doormat
1562
+ n03240683: drilling_platform
1563
+ n03249569: drum
1564
+ n03250847: drumstick
1565
+ n03255030: dumbbell
1566
+ n03259280: Dutch_oven
1567
+ n03271574: electric_fan
1568
+ n03272010: electric_guitar
1569
+ n03272562: electric_locomotive
1570
+ n03290653: entertainment_center
1571
+ n03291819: envelope
1572
+ n03297495: espresso_maker
1573
+ n03314780: face_powder
1574
+ n03325584: feather_boa
1575
+ n03337140: file
1576
+ n03344393: fireboat
1577
+ n03345487: fire_engine
1578
+ n03347037: fire_screen
1579
+ n03355925: flagpole
1580
+ n03372029: flute
1581
+ n03376595: folding_chair
1582
+ n03379051: football_helmet
1583
+ n03384352: forklift
1584
+ n03388043: fountain
1585
+ n03388183: fountain_pen
1586
+ n03388549: four-poster
1587
+ n03393912: freight_car
1588
+ n03394916: French_horn
1589
+ n03400231: frying_pan
1590
+ n03404251: fur_coat
1591
+ n03417042: garbage_truck
1592
+ n03424325: gasmask
1593
+ n03425413: gas_pump
1594
+ n03443371: goblet
1595
+ n03444034: go-kart
1596
+ n03445777: golf_ball
1597
+ n03445924: golfcart
1598
+ n03447447: gondola
1599
+ n03447721: gong
1600
+ n03450230: gown
1601
+ n03452741: grand_piano
1602
+ n03457902: greenhouse
1603
+ n03459775: grille
1604
+ n03461385: grocery_store
1605
+ n03467068: guillotine
1606
+ n03476684: hair_slide
1607
+ n03476991: hair_spray
1608
+ n03478589: half_track
1609
+ n03481172: hammer
1610
+ n03482405: hamper
1611
+ n03483316: hand_blower
1612
+ n03485407: hand-held_computer
1613
+ n03485794: handkerchief
1614
+ n03492542: hard_disc
1615
+ n03494278: harmonica
1616
+ n03495258: harp
1617
+ n03496892: harvester
1618
+ n03498962: hatchet
1619
+ n03527444: holster
1620
+ n03529860: home_theater
1621
+ n03530642: honeycomb
1622
+ n03532672: hook
1623
+ n03534580: hoopskirt
1624
+ n03535780: horizontal_bar
1625
+ n03538406: horse_cart
1626
+ n03544143: hourglass
1627
+ n03584254: iPod
1628
+ n03584829: iron
1629
+ n03590841: jack-o'-lantern
1630
+ n03594734: jean
1631
+ n03594945: jeep
1632
+ n03595614: jersey
1633
+ n03598930: jigsaw_puzzle
1634
+ n03599486: jinrikisha
1635
+ n03602883: joystick
1636
+ n03617480: kimono
1637
+ n03623198: knee_pad
1638
+ n03627232: knot
1639
+ n03630383: lab_coat
1640
+ n03633091: ladle
1641
+ n03637318: lampshade
1642
+ n03642806: laptop
1643
+ n03649909: lawn_mower
1644
+ n03657121: lens_cap
1645
+ n03658185: letter_opener
1646
+ n03661043: library
1647
+ n03662601: lifeboat
1648
+ n03666591: lighter
1649
+ n03670208: limousine
1650
+ n03673027: liner
1651
+ n03676483: lipstick
1652
+ n03680355: Loafer
1653
+ n03690938: lotion
1654
+ n03691459: loudspeaker
1655
+ n03692522: loupe
1656
+ n03697007: lumbermill
1657
+ n03706229: magnetic_compass
1658
+ n03709823: mailbag
1659
+ n03710193: mailbox
1660
+ n03710637: maillot_(tights)
1661
+ n03710721: maillot_(tank_suit)
1662
+ n03717622: manhole_cover
1663
+ n03720891: maraca
1664
+ n03721384: marimba
1665
+ n03724870: mask
1666
+ n03729826: matchstick
1667
+ n03733131: maypole
1668
+ n03733281: maze
1669
+ n03733805: measuring_cup
1670
+ n03742115: medicine_chest
1671
+ n03743016: megalith
1672
+ n03759954: microphone
1673
+ n03761084: microwave
1674
+ n03763968: military_uniform
1675
+ n03764736: milk_can
1676
+ n03769881: minibus
1677
+ n03770439: miniskirt
1678
+ n03770679: minivan
1679
+ n03773504: missile
1680
+ n03775071: mitten
1681
+ n03775546: mixing_bowl
1682
+ n03776460: mobile_home
1683
+ n03777568: Model_T
1684
+ n03777754: modem
1685
+ n03781244: monastery
1686
+ n03782006: monitor
1687
+ n03785016: moped
1688
+ n03786901: mortar
1689
+ n03787032: mortarboard
1690
+ n03788195: mosque
1691
+ n03788365: mosquito_net
1692
+ n03791053: motor_scooter
1693
+ n03792782: mountain_bike
1694
+ n03792972: mountain_tent
1695
+ n03793489: mouse
1696
+ n03794056: mousetrap
1697
+ n03796401: moving_van
1698
+ n03803284: muzzle
1699
+ n03804744: nail
1700
+ n03814639: neck_brace
1701
+ n03814906: necklace
1702
+ n03825788: nipple
1703
+ n03832673: notebook
1704
+ n03837869: obelisk
1705
+ n03838899: oboe
1706
+ n03840681: ocarina
1707
+ n03841143: odometer
1708
+ n03843555: oil_filter
1709
+ n03854065: organ
1710
+ n03857828: oscilloscope
1711
+ n03866082: overskirt
1712
+ n03868242: oxcart
1713
+ n03868863: oxygen_mask
1714
+ n03871628: packet
1715
+ n03873416: paddle
1716
+ n03874293: paddlewheel
1717
+ n03874599: padlock
1718
+ n03876231: paintbrush
1719
+ n03877472: pajama
1720
+ n03877845: palace
1721
+ n03884397: panpipe
1722
+ n03887697: paper_towel
1723
+ n03888257: parachute
1724
+ n03888605: parallel_bars
1725
+ n03891251: park_bench
1726
+ n03891332: parking_meter
1727
+ n03895866: passenger_car
1728
+ n03899768: patio
1729
+ n03902125: pay-phone
1730
+ n03903868: pedestal
1731
+ n03908618: pencil_box
1732
+ n03908714: pencil_sharpener
1733
+ n03916031: perfume
1734
+ n03920288: Petri_dish
1735
+ n03924679: photocopier
1736
+ n03929660: pick
1737
+ n03929855: pickelhaube
1738
+ n03930313: picket_fence
1739
+ n03930630: pickup
1740
+ n03933933: pier
1741
+ n03935335: piggy_bank
1742
+ n03937543: pill_bottle
1743
+ n03938244: pillow
1744
+ n03942813: ping-pong_ball
1745
+ n03944341: pinwheel
1746
+ n03947888: pirate
1747
+ n03950228: pitcher
1748
+ n03954731: plane
1749
+ n03956157: planetarium
1750
+ n03958227: plastic_bag
1751
+ n03961711: plate_rack
1752
+ n03967562: plow
1753
+ n03970156: plunger
1754
+ n03976467: Polaroid_camera
1755
+ n03976657: pole
1756
+ n03977966: police_van
1757
+ n03980874: poncho
1758
+ n03982430: pool_table
1759
+ n03983396: pop_bottle
1760
+ n03991062: pot
1761
+ n03992509: potter's_wheel
1762
+ n03995372: power_drill
1763
+ n03998194: prayer_rug
1764
+ n04004767: printer
1765
+ n04005630: prison
1766
+ n04008634: projectile
1767
+ n04009552: projector
1768
+ n04019541: puck
1769
+ n04023962: punching_bag
1770
+ n04026417: purse
1771
+ n04033901: quill
1772
+ n04033995: quilt
1773
+ n04037443: racer
1774
+ n04039381: racket
1775
+ n04040759: radiator
1776
+ n04041544: radio
1777
+ n04044716: radio_telescope
1778
+ n04049303: rain_barrel
1779
+ n04065272: recreational_vehicle
1780
+ n04067472: reel
1781
+ n04069434: reflex_camera
1782
+ n04070727: refrigerator
1783
+ n04074963: remote_control
1784
+ n04081281: restaurant
1785
+ n04086273: revolver
1786
+ n04090263: rifle
1787
+ n04099969: rocking_chair
1788
+ n04111531: rotisserie
1789
+ n04116512: rubber_eraser
1790
+ n04118538: rugby_ball
1791
+ n04118776: rule
1792
+ n04120489: running_shoe
1793
+ n04125021: safe
1794
+ n04127249: safety_pin
1795
+ n04131690: saltshaker
1796
+ n04133789: sandal
1797
+ n04136333: sarong
1798
+ n04141076: sax
1799
+ n04141327: scabbard
1800
+ n04141975: scale
1801
+ n04146614: school_bus
1802
+ n04147183: schooner
1803
+ n04149813: scoreboard
1804
+ n04152593: screen
1805
+ n04153751: screw
1806
+ n04154565: screwdriver
1807
+ n04162706: seat_belt
1808
+ n04179913: sewing_machine
1809
+ n04192698: shield
1810
+ n04200800: shoe_shop
1811
+ n04201297: shoji
1812
+ n04204238: shopping_basket
1813
+ n04204347: shopping_cart
1814
+ n04208210: shovel
1815
+ n04209133: shower_cap
1816
+ n04209239: shower_curtain
1817
+ n04228054: ski
1818
+ n04229816: ski_mask
1819
+ n04235860: sleeping_bag
1820
+ n04238763: slide_rule
1821
+ n04239074: sliding_door
1822
+ n04243546: slot
1823
+ n04251144: snorkel
1824
+ n04252077: snowmobile
1825
+ n04252225: snowplow
1826
+ n04254120: soap_dispenser
1827
+ n04254680: soccer_ball
1828
+ n04254777: sock
1829
+ n04258138: solar_dish
1830
+ n04259630: sombrero
1831
+ n04263257: soup_bowl
1832
+ n04264628: space_bar
1833
+ n04265275: space_heater
1834
+ n04266014: space_shuttle
1835
+ n04270147: spatula
1836
+ n04273569: speedboat
1837
+ n04275548: spider_web
1838
+ n04277352: spindle
1839
+ n04285008: sports_car
1840
+ n04286575: spotlight
1841
+ n04296562: stage
1842
+ n04310018: steam_locomotive
1843
+ n04311004: steel_arch_bridge
1844
+ n04311174: steel_drum
1845
+ n04317175: stethoscope
1846
+ n04325704: stole
1847
+ n04326547: stone_wall
1848
+ n04328186: stopwatch
1849
+ n04330267: stove
1850
+ n04332243: strainer
1851
+ n04335435: streetcar
1852
+ n04336792: stretcher
1853
+ n04344873: studio_couch
1854
+ n04346328: stupa
1855
+ n04347754: submarine
1856
+ n04350905: suit
1857
+ n04355338: sundial
1858
+ n04355933: sunglass
1859
+ n04356056: sunglasses
1860
+ n04357314: sunscreen
1861
+ n04366367: suspension_bridge
1862
+ n04367480: swab
1863
+ n04370456: sweatshirt
1864
+ n04371430: swimming_trunks
1865
+ n04371774: swing
1866
+ n04372370: switch
1867
+ n04376876: syringe
1868
+ n04380533: table_lamp
1869
+ n04389033: tank
1870
+ n04392985: tape_player
1871
+ n04398044: teapot
1872
+ n04399382: teddy
1873
+ n04404412: television
1874
+ n04409515: tennis_ball
1875
+ n04417672: thatch
1876
+ n04418357: theater_curtain
1877
+ n04423845: thimble
1878
+ n04428191: thresher
1879
+ n04429376: throne
1880
+ n04435653: tile_roof
1881
+ n04442312: toaster
1882
+ n04443257: tobacco_shop
1883
+ n04447861: toilet_seat
1884
+ n04456115: torch
1885
+ n04458633: totem_pole
1886
+ n04461696: tow_truck
1887
+ n04462240: toyshop
1888
+ n04465501: tractor
1889
+ n04467665: trailer_truck
1890
+ n04476259: tray
1891
+ n04479046: trench_coat
1892
+ n04482393: tricycle
1893
+ n04483307: trimaran
1894
+ n04485082: tripod
1895
+ n04486054: triumphal_arch
1896
+ n04487081: trolleybus
1897
+ n04487394: trombone
1898
+ n04493381: tub
1899
+ n04501370: turnstile
1900
+ n04505470: typewriter_keyboard
1901
+ n04507155: umbrella
1902
+ n04509417: unicycle
1903
+ n04515003: upright
1904
+ n04517823: vacuum
1905
+ n04522168: vase
1906
+ n04523525: vault
1907
+ n04525038: velvet
1908
+ n04525305: vending_machine
1909
+ n04532106: vestment
1910
+ n04532670: viaduct
1911
+ n04536866: violin
1912
+ n04540053: volleyball
1913
+ n04542943: waffle_iron
1914
+ n04548280: wall_clock
1915
+ n04548362: wallet
1916
+ n04550184: wardrobe
1917
+ n04552348: warplane
1918
+ n04553703: washbasin
1919
+ n04554684: washer
1920
+ n04557648: water_bottle
1921
+ n04560804: water_jug
1922
+ n04562935: water_tower
1923
+ n04579145: whiskey_jug
1924
+ n04579432: whistle
1925
+ n04584207: wig
1926
+ n04589890: window_screen
1927
+ n04590129: window_shade
1928
+ n04591157: Windsor_tie
1929
+ n04591713: wine_bottle
1930
+ n04592741: wing
1931
+ n04596742: wok
1932
+ n04597913: wooden_spoon
1933
+ n04599235: wool
1934
+ n04604644: worm_fence
1935
+ n04606251: wreck
1936
+ n04612504: yawl
1937
+ n04613696: yurt
1938
+ n06359193: web_site
1939
+ n06596364: comic_book
1940
+ n06785654: crossword_puzzle
1941
+ n06794110: street_sign
1942
+ n06874185: traffic_light
1943
+ n07248320: book_jacket
1944
+ n07565083: menu
1945
+ n07579787: plate
1946
+ n07583066: guacamole
1947
+ n07584110: consomme
1948
+ n07590611: hot_pot
1949
+ n07613480: trifle
1950
+ n07614500: ice_cream
1951
+ n07615774: ice_lolly
1952
+ n07684084: French_loaf
1953
+ n07693725: bagel
1954
+ n07695742: pretzel
1955
+ n07697313: cheeseburger
1956
+ n07697537: hotdog
1957
+ n07711569: mashed_potato
1958
+ n07714571: head_cabbage
1959
+ n07714990: broccoli
1960
+ n07715103: cauliflower
1961
+ n07716358: zucchini
1962
+ n07716906: spaghetti_squash
1963
+ n07717410: acorn_squash
1964
+ n07717556: butternut_squash
1965
+ n07718472: cucumber
1966
+ n07718747: artichoke
1967
+ n07720875: bell_pepper
1968
+ n07730033: cardoon
1969
+ n07734744: mushroom
1970
+ n07742313: Granny_Smith
1971
+ n07745940: strawberry
1972
+ n07747607: orange
1973
+ n07749582: lemon
1974
+ n07753113: fig
1975
+ n07753275: pineapple
1976
+ n07753592: banana
1977
+ n07754684: jackfruit
1978
+ n07760859: custard_apple
1979
+ n07768694: pomegranate
1980
+ n07802026: hay
1981
+ n07831146: carbonara
1982
+ n07836838: chocolate_sauce
1983
+ n07860988: dough
1984
+ n07871810: meat_loaf
1985
+ n07873807: pizza
1986
+ n07875152: potpie
1987
+ n07880968: burrito
1988
+ n07892512: red_wine
1989
+ n07920052: espresso
1990
+ n07930864: cup
1991
+ n07932039: eggnog
1992
+ n09193705: alp
1993
+ n09229709: bubble
1994
+ n09246464: cliff
1995
+ n09256479: coral_reef
1996
+ n09288635: geyser
1997
+ n09332890: lakeside
1998
+ n09399592: promontory
1999
+ n09421951: sandbar
2000
+ n09428293: seashore
2001
+ n09468604: valley
2002
+ n09472597: volcano
2003
+ n09835506: ballplayer
2004
+ n10148035: groom
2005
+ n10565667: scuba_diver
2006
+ n11879895: rapeseed
2007
+ n11939491: daisy
2008
+ n12057211: yellow_lady's_slipper
2009
+ n12144580: corn
2010
+ n12267677: acorn
2011
+ n12620546: hip
2012
+ n12768682: buckeye
2013
+ n12985857: coral_fungus
2014
+ n12998815: agaric
2015
+ n13037406: gyromitra
2016
+ n13040303: stinkhorn
2017
+ n13044778: earthstar
2018
+ n13052670: hen-of-the-woods
2019
+ n13054560: bolete
2020
+ n13133613: ear
2021
+ n15075141: toilet_tissue
2022
+
2023
+
2024
+ # Download script/URL (optional)
2025
+ download: yolo/data/scripts/get_imagenet.sh
ultralytics/cfg/datasets/Objects365.yaml ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Objects365 dataset https://www.objects365.org/ by Megvii
3
+ # Example usage: yolo train data=Objects365.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── Objects365 ← downloads here (712 GB = 367G data + 345G zips)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/Objects365 # dataset root dir
12
+ train: images/train # train images (relative to 'path') 1742289 images
13
+ val: images/val # val images (relative to 'path') 80000 images
14
+ test: # test images (optional)
15
+
16
+ # Classes
17
+ names:
18
+ 0: Person
19
+ 1: Sneakers
20
+ 2: Chair
21
+ 3: Other Shoes
22
+ 4: Hat
23
+ 5: Car
24
+ 6: Lamp
25
+ 7: Glasses
26
+ 8: Bottle
27
+ 9: Desk
28
+ 10: Cup
29
+ 11: Street Lights
30
+ 12: Cabinet/shelf
31
+ 13: Handbag/Satchel
32
+ 14: Bracelet
33
+ 15: Plate
34
+ 16: Picture/Frame
35
+ 17: Helmet
36
+ 18: Book
37
+ 19: Gloves
38
+ 20: Storage box
39
+ 21: Boat
40
+ 22: Leather Shoes
41
+ 23: Flower
42
+ 24: Bench
43
+ 25: Potted Plant
44
+ 26: Bowl/Basin
45
+ 27: Flag
46
+ 28: Pillow
47
+ 29: Boots
48
+ 30: Vase
49
+ 31: Microphone
50
+ 32: Necklace
51
+ 33: Ring
52
+ 34: SUV
53
+ 35: Wine Glass
54
+ 36: Belt
55
+ 37: Monitor/TV
56
+ 38: Backpack
57
+ 39: Umbrella
58
+ 40: Traffic Light
59
+ 41: Speaker
60
+ 42: Watch
61
+ 43: Tie
62
+ 44: Trash bin Can
63
+ 45: Slippers
64
+ 46: Bicycle
65
+ 47: Stool
66
+ 48: Barrel/bucket
67
+ 49: Van
68
+ 50: Couch
69
+ 51: Sandals
70
+ 52: Basket
71
+ 53: Drum
72
+ 54: Pen/Pencil
73
+ 55: Bus
74
+ 56: Wild Bird
75
+ 57: High Heels
76
+ 58: Motorcycle
77
+ 59: Guitar
78
+ 60: Carpet
79
+ 61: Cell Phone
80
+ 62: Bread
81
+ 63: Camera
82
+ 64: Canned
83
+ 65: Truck
84
+ 66: Traffic cone
85
+ 67: Cymbal
86
+ 68: Lifesaver
87
+ 69: Towel
88
+ 70: Stuffed Toy
89
+ 71: Candle
90
+ 72: Sailboat
91
+ 73: Laptop
92
+ 74: Awning
93
+ 75: Bed
94
+ 76: Faucet
95
+ 77: Tent
96
+ 78: Horse
97
+ 79: Mirror
98
+ 80: Power outlet
99
+ 81: Sink
100
+ 82: Apple
101
+ 83: Air Conditioner
102
+ 84: Knife
103
+ 85: Hockey Stick
104
+ 86: Paddle
105
+ 87: Pickup Truck
106
+ 88: Fork
107
+ 89: Traffic Sign
108
+ 90: Balloon
109
+ 91: Tripod
110
+ 92: Dog
111
+ 93: Spoon
112
+ 94: Clock
113
+ 95: Pot
114
+ 96: Cow
115
+ 97: Cake
116
+ 98: Dinning Table
117
+ 99: Sheep
118
+ 100: Hanger
119
+ 101: Blackboard/Whiteboard
120
+ 102: Napkin
121
+ 103: Other Fish
122
+ 104: Orange/Tangerine
123
+ 105: Toiletry
124
+ 106: Keyboard
125
+ 107: Tomato
126
+ 108: Lantern
127
+ 109: Machinery Vehicle
128
+ 110: Fan
129
+ 111: Green Vegetables
130
+ 112: Banana
131
+ 113: Baseball Glove
132
+ 114: Airplane
133
+ 115: Mouse
134
+ 116: Train
135
+ 117: Pumpkin
136
+ 118: Soccer
137
+ 119: Skiboard
138
+ 120: Luggage
139
+ 121: Nightstand
140
+ 122: Tea pot
141
+ 123: Telephone
142
+ 124: Trolley
143
+ 125: Head Phone
144
+ 126: Sports Car
145
+ 127: Stop Sign
146
+ 128: Dessert
147
+ 129: Scooter
148
+ 130: Stroller
149
+ 131: Crane
150
+ 132: Remote
151
+ 133: Refrigerator
152
+ 134: Oven
153
+ 135: Lemon
154
+ 136: Duck
155
+ 137: Baseball Bat
156
+ 138: Surveillance Camera
157
+ 139: Cat
158
+ 140: Jug
159
+ 141: Broccoli
160
+ 142: Piano
161
+ 143: Pizza
162
+ 144: Elephant
163
+ 145: Skateboard
164
+ 146: Surfboard
165
+ 147: Gun
166
+ 148: Skating and Skiing shoes
167
+ 149: Gas stove
168
+ 150: Donut
169
+ 151: Bow Tie
170
+ 152: Carrot
171
+ 153: Toilet
172
+ 154: Kite
173
+ 155: Strawberry
174
+ 156: Other Balls
175
+ 157: Shovel
176
+ 158: Pepper
177
+ 159: Computer Box
178
+ 160: Toilet Paper
179
+ 161: Cleaning Products
180
+ 162: Chopsticks
181
+ 163: Microwave
182
+ 164: Pigeon
183
+ 165: Baseball
184
+ 166: Cutting/chopping Board
185
+ 167: Coffee Table
186
+ 168: Side Table
187
+ 169: Scissors
188
+ 170: Marker
189
+ 171: Pie
190
+ 172: Ladder
191
+ 173: Snowboard
192
+ 174: Cookies
193
+ 175: Radiator
194
+ 176: Fire Hydrant
195
+ 177: Basketball
196
+ 178: Zebra
197
+ 179: Grape
198
+ 180: Giraffe
199
+ 181: Potato
200
+ 182: Sausage
201
+ 183: Tricycle
202
+ 184: Violin
203
+ 185: Egg
204
+ 186: Fire Extinguisher
205
+ 187: Candy
206
+ 188: Fire Truck
207
+ 189: Billiards
208
+ 190: Converter
209
+ 191: Bathtub
210
+ 192: Wheelchair
211
+ 193: Golf Club
212
+ 194: Briefcase
213
+ 195: Cucumber
214
+ 196: Cigar/Cigarette
215
+ 197: Paint Brush
216
+ 198: Pear
217
+ 199: Heavy Truck
218
+ 200: Hamburger
219
+ 201: Extractor
220
+ 202: Extension Cord
221
+ 203: Tong
222
+ 204: Tennis Racket
223
+ 205: Folder
224
+ 206: American Football
225
+ 207: earphone
226
+ 208: Mask
227
+ 209: Kettle
228
+ 210: Tennis
229
+ 211: Ship
230
+ 212: Swing
231
+ 213: Coffee Machine
232
+ 214: Slide
233
+ 215: Carriage
234
+ 216: Onion
235
+ 217: Green beans
236
+ 218: Projector
237
+ 219: Frisbee
238
+ 220: Washing Machine/Drying Machine
239
+ 221: Chicken
240
+ 222: Printer
241
+ 223: Watermelon
242
+ 224: Saxophone
243
+ 225: Tissue
244
+ 226: Toothbrush
245
+ 227: Ice cream
246
+ 228: Hot-air balloon
247
+ 229: Cello
248
+ 230: French Fries
249
+ 231: Scale
250
+ 232: Trophy
251
+ 233: Cabbage
252
+ 234: Hot dog
253
+ 235: Blender
254
+ 236: Peach
255
+ 237: Rice
256
+ 238: Wallet/Purse
257
+ 239: Volleyball
258
+ 240: Deer
259
+ 241: Goose
260
+ 242: Tape
261
+ 243: Tablet
262
+ 244: Cosmetics
263
+ 245: Trumpet
264
+ 246: Pineapple
265
+ 247: Golf Ball
266
+ 248: Ambulance
267
+ 249: Parking meter
268
+ 250: Mango
269
+ 251: Key
270
+ 252: Hurdle
271
+ 253: Fishing Rod
272
+ 254: Medal
273
+ 255: Flute
274
+ 256: Brush
275
+ 257: Penguin
276
+ 258: Megaphone
277
+ 259: Corn
278
+ 260: Lettuce
279
+ 261: Garlic
280
+ 262: Swan
281
+ 263: Helicopter
282
+ 264: Green Onion
283
+ 265: Sandwich
284
+ 266: Nuts
285
+ 267: Speed Limit Sign
286
+ 268: Induction Cooker
287
+ 269: Broom
288
+ 270: Trombone
289
+ 271: Plum
290
+ 272: Rickshaw
291
+ 273: Goldfish
292
+ 274: Kiwi fruit
293
+ 275: Router/modem
294
+ 276: Poker Card
295
+ 277: Toaster
296
+ 278: Shrimp
297
+ 279: Sushi
298
+ 280: Cheese
299
+ 281: Notepaper
300
+ 282: Cherry
301
+ 283: Pliers
302
+ 284: CD
303
+ 285: Pasta
304
+ 286: Hammer
305
+ 287: Cue
306
+ 288: Avocado
307
+ 289: Hamimelon
308
+ 290: Flask
309
+ 291: Mushroom
310
+ 292: Screwdriver
311
+ 293: Soap
312
+ 294: Recorder
313
+ 295: Bear
314
+ 296: Eggplant
315
+ 297: Board Eraser
316
+ 298: Coconut
317
+ 299: Tape Measure/Ruler
318
+ 300: Pig
319
+ 301: Showerhead
320
+ 302: Globe
321
+ 303: Chips
322
+ 304: Steak
323
+ 305: Crosswalk Sign
324
+ 306: Stapler
325
+ 307: Camel
326
+ 308: Formula 1
327
+ 309: Pomegranate
328
+ 310: Dishwasher
329
+ 311: Crab
330
+ 312: Hoverboard
331
+ 313: Meat ball
332
+ 314: Rice Cooker
333
+ 315: Tuba
334
+ 316: Calculator
335
+ 317: Papaya
336
+ 318: Antelope
337
+ 319: Parrot
338
+ 320: Seal
339
+ 321: Butterfly
340
+ 322: Dumbbell
341
+ 323: Donkey
342
+ 324: Lion
343
+ 325: Urinal
344
+ 326: Dolphin
345
+ 327: Electric Drill
346
+ 328: Hair Dryer
347
+ 329: Egg tart
348
+ 330: Jellyfish
349
+ 331: Treadmill
350
+ 332: Lighter
351
+ 333: Grapefruit
352
+ 334: Game board
353
+ 335: Mop
354
+ 336: Radish
355
+ 337: Baozi
356
+ 338: Target
357
+ 339: French
358
+ 340: Spring Rolls
359
+ 341: Monkey
360
+ 342: Rabbit
361
+ 343: Pencil Case
362
+ 344: Yak
363
+ 345: Red Cabbage
364
+ 346: Binoculars
365
+ 347: Asparagus
366
+ 348: Barbell
367
+ 349: Scallop
368
+ 350: Noddles
369
+ 351: Comb
370
+ 352: Dumpling
371
+ 353: Oyster
372
+ 354: Table Tennis paddle
373
+ 355: Cosmetics Brush/Eyeliner Pencil
374
+ 356: Chainsaw
375
+ 357: Eraser
376
+ 358: Lobster
377
+ 359: Durian
378
+ 360: Okra
379
+ 361: Lipstick
380
+ 362: Cosmetics Mirror
381
+ 363: Curling
382
+ 364: Table Tennis
383
+
384
+
385
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
386
+ download: |
387
+ from tqdm import tqdm
388
+
389
+ from ultralytics.utils.checks import check_requirements
390
+ from ultralytics.utils.downloads import download
391
+ from ultralytics.utils.ops import xyxy2xywhn
392
+
393
+ import numpy as np
394
+ from pathlib import Path
395
+
396
+ check_requirements(('pycocotools>=2.0',))
397
+ from pycocotools.coco import COCO
398
+
399
+ # Make Directories
400
+ dir = Path(yaml['path']) # dataset root dir
401
+ for p in 'images', 'labels':
402
+ (dir / p).mkdir(parents=True, exist_ok=True)
403
+ for q in 'train', 'val':
404
+ (dir / p / q).mkdir(parents=True, exist_ok=True)
405
+
406
+ # Train, Val Splits
407
+ for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
408
+ print(f"Processing {split} in {patches} patches ...")
409
+ images, labels = dir / 'images' / split, dir / 'labels' / split
410
+
411
+ # Download
412
+ url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
413
+ if split == 'train':
414
+ download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir) # annotations json
415
+ download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
416
+ elif split == 'val':
417
+ download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir) # annotations json
418
+ download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
419
+ download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
420
+
421
+ # Move
422
+ for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
423
+ f.rename(images / f.name) # move to /images/{split}
424
+
425
+ # Labels
426
+ coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
427
+ names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
428
+ for cid, cat in enumerate(names):
429
+ catIds = coco.getCatIds(catNms=[cat])
430
+ imgIds = coco.getImgIds(catIds=catIds)
431
+ for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
432
+ width, height = im["width"], im["height"]
433
+ path = Path(im["file_name"]) # image filename
434
+ try:
435
+ with open(labels / path.with_suffix('.txt').name, 'a') as file:
436
+ annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
437
+ for a in coco.loadAnns(annIds):
438
+ x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
439
+ xyxy = np.array([x, y, x + w, y + h])[None] # pixels(1,4)
440
+ x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0] # normalized and clipped
441
+ file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
442
+ except Exception as e:
443
+ print(e)
ultralytics/cfg/datasets/SKU-110K.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
3
+ # Example usage: yolo train data=SKU-110K.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── SKU-110K ← downloads here (13.6 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/SKU-110K # dataset root dir
12
+ train: train.txt # train images (relative to 'path') 8219 images
13
+ val: val.txt # val images (relative to 'path') 588 images
14
+ test: test.txt # test images (optional) 2936 images
15
+
16
+ # Classes
17
+ names:
18
+ 0: object
19
+
20
+
21
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
22
+ download: |
23
+ import shutil
24
+ from pathlib import Path
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+ from tqdm import tqdm
29
+
30
+ from ultralytics.utils.downloads import download
31
+ from ultralytics.utils.ops import xyxy2xywh
32
+
33
+ # Download
34
+ dir = Path(yaml['path']) # dataset root dir
35
+ parent = Path(dir.parent) # download dir
36
+ urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
37
+ download(urls, dir=parent)
38
+
39
+ # Rename directories
40
+ if dir.exists():
41
+ shutil.rmtree(dir)
42
+ (parent / 'SKU110K_fixed').rename(dir) # rename dir
43
+ (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
44
+
45
+ # Convert labels
46
+ names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
47
+ for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
48
+ x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
49
+ images, unique_images = x[:, 0], np.unique(x[:, 0])
50
+ with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
51
+ f.writelines(f'./images/{s}\n' for s in unique_images)
52
+ for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
53
+ cls = 0 # single-class dataset
54
+ with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
55
+ for r in x[images == im]:
56
+ w, h = r[6], r[7] # image width, height
57
+ xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
58
+ f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label
ultralytics/cfg/datasets/VOC.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
3
+ # Example usage: yolo train data=VOC.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── VOC ← downloads here (2.8 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/VOC
12
+ train: # train images (relative to 'path') 16551 images
13
+ - images/train2012
14
+ - images/train2007
15
+ - images/val2012
16
+ - images/val2007
17
+ val: # val images (relative to 'path') 4952 images
18
+ - images/test2007
19
+ test: # test images (optional)
20
+ - images/test2007
21
+
22
+ # Classes
23
+ names:
24
+ 0: aeroplane
25
+ 1: bicycle
26
+ 2: bird
27
+ 3: boat
28
+ 4: bottle
29
+ 5: bus
30
+ 6: car
31
+ 7: cat
32
+ 8: chair
33
+ 9: cow
34
+ 10: diningtable
35
+ 11: dog
36
+ 12: horse
37
+ 13: motorbike
38
+ 14: person
39
+ 15: pottedplant
40
+ 16: sheep
41
+ 17: sofa
42
+ 18: train
43
+ 19: tvmonitor
44
+
45
+
46
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
47
+ download: |
48
+ import xml.etree.ElementTree as ET
49
+
50
+ from tqdm import tqdm
51
+ from ultralytics.utils.downloads import download
52
+ from pathlib import Path
53
+
54
+ def convert_label(path, lb_path, year, image_id):
55
+ def convert_box(size, box):
56
+ dw, dh = 1. / size[0], 1. / size[1]
57
+ x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
58
+ return x * dw, y * dh, w * dw, h * dh
59
+
60
+ in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
61
+ out_file = open(lb_path, 'w')
62
+ tree = ET.parse(in_file)
63
+ root = tree.getroot()
64
+ size = root.find('size')
65
+ w = int(size.find('width').text)
66
+ h = int(size.find('height').text)
67
+
68
+ names = list(yaml['names'].values()) # names list
69
+ for obj in root.iter('object'):
70
+ cls = obj.find('name').text
71
+ if cls in names and int(obj.find('difficult').text) != 1:
72
+ xmlbox = obj.find('bndbox')
73
+ bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
74
+ cls_id = names.index(cls) # class id
75
+ out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n')
76
+
77
+
78
+ # Download
79
+ dir = Path(yaml['path']) # dataset root dir
80
+ url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
81
+ urls = [f'{url}VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
82
+ f'{url}VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
83
+ f'{url}VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
84
+ download(urls, dir=dir / 'images', curl=True, threads=3)
85
+
86
+ # Convert
87
+ path = dir / 'images/VOCdevkit'
88
+ for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
89
+ imgs_path = dir / 'images' / f'{image_set}{year}'
90
+ lbs_path = dir / 'labels' / f'{image_set}{year}'
91
+ imgs_path.mkdir(exist_ok=True, parents=True)
92
+ lbs_path.mkdir(exist_ok=True, parents=True)
93
+
94
+ with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
95
+ image_ids = f.read().strip().split()
96
+ for id in tqdm(image_ids, desc=f'{image_set}{year}'):
97
+ f = path / f'VOC{year}/JPEGImages/{id}.jpg' # old img path
98
+ lb_path = (lbs_path / f.name).with_suffix('.txt') # new label path
99
+ f.rename(imgs_path / f.name) # move image
100
+ convert_label(path, lb_path, year, id) # convert labels to YOLO format
ultralytics/cfg/datasets/VisDrone.yaml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
3
+ # Example usage: yolo train data=VisDrone.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── VisDrone ← downloads here (2.3 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/VisDrone # dataset root dir
12
+ train: VisDrone2019-DET-train/images # train images (relative to 'path') 6471 images
13
+ val: VisDrone2019-DET-val/images # val images (relative to 'path') 548 images
14
+ test: VisDrone2019-DET-test-dev/images # test images (optional) 1610 images
15
+
16
+ # Classes
17
+ names:
18
+ 0: pedestrian
19
+ 1: people
20
+ 2: bicycle
21
+ 3: car
22
+ 4: van
23
+ 5: truck
24
+ 6: tricycle
25
+ 7: awning-tricycle
26
+ 8: bus
27
+ 9: motor
28
+
29
+
30
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
31
+ download: |
32
+ import os
33
+ from pathlib import Path
34
+
35
+ from ultralytics.utils.downloads import download
36
+
37
+ def visdrone2yolo(dir):
38
+ from PIL import Image
39
+ from tqdm import tqdm
40
+
41
+ def convert_box(size, box):
42
+ # Convert VisDrone box to YOLO xywh box
43
+ dw = 1. / size[0]
44
+ dh = 1. / size[1]
45
+ return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
46
+
47
+ (dir / 'labels').mkdir(parents=True, exist_ok=True) # make labels directory
48
+ pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
49
+ for f in pbar:
50
+ img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
51
+ lines = []
52
+ with open(f, 'r') as file: # read annotation.txt
53
+ for row in [x.split(',') for x in file.read().strip().splitlines()]:
54
+ if row[4] == '0': # VisDrone 'ignored regions' class 0
55
+ continue
56
+ cls = int(row[5]) - 1
57
+ box = convert_box(img_size, tuple(map(int, row[:4])))
58
+ lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
59
+ with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
60
+ fl.writelines(lines) # write label.txt
61
+
62
+
63
+ # Download
64
+ dir = Path(yaml['path']) # dataset root dir
65
+ urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
66
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
67
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
68
+ 'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
69
+ download(urls, dir=dir, curl=True, threads=4)
70
+
71
+ # Convert
72
+ for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
73
+ visdrone2yolo(dir / d) # convert VisDrone annotations to YOLO labels
ultralytics/cfg/datasets/coco-pose.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO 2017 dataset http://cocodataset.org by Microsoft
3
+ # Example usage: yolo train data=coco-pose.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco-pose ← downloads here (20.1 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco-pose # dataset root dir
12
+ train: train2017.txt # train images (relative to 'path') 118287 images
13
+ val: val2017.txt # val images (relative to 'path') 5000 images
14
+ test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15
+
16
+ # Keypoints
17
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
18
+ flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
19
+
20
+ # Classes
21
+ names:
22
+ 0: person
23
+
24
+ # Download script/URL (optional)
25
+ download: |
26
+ from ultralytics.utils.downloads import download
27
+ from pathlib import Path
28
+
29
+ # Download labels
30
+ dir = Path(yaml['path']) # dataset root dir
31
+ url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
32
+ urls = [url + 'coco2017labels-pose.zip'] # labels
33
+ download(urls, dir=dir.parent)
34
+ # Download data
35
+ urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
36
+ 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
37
+ 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
38
+ download(urls, dir=dir / 'images', threads=3)
ultralytics/cfg/datasets/coco.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO 2017 dataset http://cocodataset.org by Microsoft
3
+ # Example usage: yolo train data=coco.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco ← downloads here (20.1 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco # dataset root dir
12
+ train: train2017.txt # train images (relative to 'path') 118287 images
13
+ val: val2017.txt # val images (relative to 'path') 5000 images
14
+ test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15
+
16
+ # Classes
17
+ names:
18
+ 0: person
19
+ 1: bicycle
20
+ 2: car
21
+ 3: motorcycle
22
+ 4: airplane
23
+ 5: bus
24
+ 6: train
25
+ 7: truck
26
+ 8: boat
27
+ 9: traffic light
28
+ 10: fire hydrant
29
+ 11: stop sign
30
+ 12: parking meter
31
+ 13: bench
32
+ 14: bird
33
+ 15: cat
34
+ 16: dog
35
+ 17: horse
36
+ 18: sheep
37
+ 19: cow
38
+ 20: elephant
39
+ 21: bear
40
+ 22: zebra
41
+ 23: giraffe
42
+ 24: backpack
43
+ 25: umbrella
44
+ 26: handbag
45
+ 27: tie
46
+ 28: suitcase
47
+ 29: frisbee
48
+ 30: skis
49
+ 31: snowboard
50
+ 32: sports ball
51
+ 33: kite
52
+ 34: baseball bat
53
+ 35: baseball glove
54
+ 36: skateboard
55
+ 37: surfboard
56
+ 38: tennis racket
57
+ 39: bottle
58
+ 40: wine glass
59
+ 41: cup
60
+ 42: fork
61
+ 43: knife
62
+ 44: spoon
63
+ 45: bowl
64
+ 46: banana
65
+ 47: apple
66
+ 48: sandwich
67
+ 49: orange
68
+ 50: broccoli
69
+ 51: carrot
70
+ 52: hot dog
71
+ 53: pizza
72
+ 54: donut
73
+ 55: cake
74
+ 56: chair
75
+ 57: couch
76
+ 58: potted plant
77
+ 59: bed
78
+ 60: dining table
79
+ 61: toilet
80
+ 62: tv
81
+ 63: laptop
82
+ 64: mouse
83
+ 65: remote
84
+ 66: keyboard
85
+ 67: cell phone
86
+ 68: microwave
87
+ 69: oven
88
+ 70: toaster
89
+ 71: sink
90
+ 72: refrigerator
91
+ 73: book
92
+ 74: clock
93
+ 75: vase
94
+ 76: scissors
95
+ 77: teddy bear
96
+ 78: hair drier
97
+ 79: toothbrush
98
+
99
+
100
+ # Download script/URL (optional)
101
+ download: |
102
+ from ultralytics.utils.downloads import download
103
+ from pathlib import Path
104
+
105
+ # Download labels
106
+ segments = True # segment or box labels
107
+ dir = Path(yaml['path']) # dataset root dir
108
+ url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
109
+ urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')] # labels
110
+ download(urls, dir=dir.parent)
111
+ # Download data
112
+ urls = ['http://images.cocodataset.org/zips/train2017.zip', # 19G, 118k images
113
+ 'http://images.cocodataset.org/zips/val2017.zip', # 1G, 5k images
114
+ 'http://images.cocodataset.org/zips/test2017.zip'] # 7G, 41k images (optional)
115
+ download(urls, dir=dir / 'images', threads=3)
ultralytics/cfg/datasets/coco128-seg.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
3
+ # Example usage: yolo train data=coco128.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco128-seg ← downloads here (7 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco128-seg # dataset root dir
12
+ train: images/train2017 # train images (relative to 'path') 128 images
13
+ val: images/train2017 # val images (relative to 'path') 128 images
14
+ test: # test images (optional)
15
+
16
+ # Classes
17
+ names:
18
+ 0: person
19
+ 1: bicycle
20
+ 2: car
21
+ 3: motorcycle
22
+ 4: airplane
23
+ 5: bus
24
+ 6: train
25
+ 7: truck
26
+ 8: boat
27
+ 9: traffic light
28
+ 10: fire hydrant
29
+ 11: stop sign
30
+ 12: parking meter
31
+ 13: bench
32
+ 14: bird
33
+ 15: cat
34
+ 16: dog
35
+ 17: horse
36
+ 18: sheep
37
+ 19: cow
38
+ 20: elephant
39
+ 21: bear
40
+ 22: zebra
41
+ 23: giraffe
42
+ 24: backpack
43
+ 25: umbrella
44
+ 26: handbag
45
+ 27: tie
46
+ 28: suitcase
47
+ 29: frisbee
48
+ 30: skis
49
+ 31: snowboard
50
+ 32: sports ball
51
+ 33: kite
52
+ 34: baseball bat
53
+ 35: baseball glove
54
+ 36: skateboard
55
+ 37: surfboard
56
+ 38: tennis racket
57
+ 39: bottle
58
+ 40: wine glass
59
+ 41: cup
60
+ 42: fork
61
+ 43: knife
62
+ 44: spoon
63
+ 45: bowl
64
+ 46: banana
65
+ 47: apple
66
+ 48: sandwich
67
+ 49: orange
68
+ 50: broccoli
69
+ 51: carrot
70
+ 52: hot dog
71
+ 53: pizza
72
+ 54: donut
73
+ 55: cake
74
+ 56: chair
75
+ 57: couch
76
+ 58: potted plant
77
+ 59: bed
78
+ 60: dining table
79
+ 61: toilet
80
+ 62: tv
81
+ 63: laptop
82
+ 64: mouse
83
+ 65: remote
84
+ 66: keyboard
85
+ 67: cell phone
86
+ 68: microwave
87
+ 69: oven
88
+ 70: toaster
89
+ 71: sink
90
+ 72: refrigerator
91
+ 73: book
92
+ 74: clock
93
+ 75: vase
94
+ 76: scissors
95
+ 77: teddy bear
96
+ 78: hair drier
97
+ 79: toothbrush
98
+
99
+
100
+ # Download script/URL (optional)
101
+ download: https://ultralytics.com/assets/coco128-seg.zip
ultralytics/cfg/datasets/coco128.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
3
+ # Example usage: yolo train data=coco128.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco128 ← downloads here (7 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco128 # dataset root dir
12
+ train: images/train2017 # train images (relative to 'path') 128 images
13
+ val: images/train2017 # val images (relative to 'path') 128 images
14
+ test: # test images (optional)
15
+
16
+ # Classes
17
+ names:
18
+ 0: person
19
+ 1: bicycle
20
+ 2: car
21
+ 3: motorcycle
22
+ 4: airplane
23
+ 5: bus
24
+ 6: train
25
+ 7: truck
26
+ 8: boat
27
+ 9: traffic light
28
+ 10: fire hydrant
29
+ 11: stop sign
30
+ 12: parking meter
31
+ 13: bench
32
+ 14: bird
33
+ 15: cat
34
+ 16: dog
35
+ 17: horse
36
+ 18: sheep
37
+ 19: cow
38
+ 20: elephant
39
+ 21: bear
40
+ 22: zebra
41
+ 23: giraffe
42
+ 24: backpack
43
+ 25: umbrella
44
+ 26: handbag
45
+ 27: tie
46
+ 28: suitcase
47
+ 29: frisbee
48
+ 30: skis
49
+ 31: snowboard
50
+ 32: sports ball
51
+ 33: kite
52
+ 34: baseball bat
53
+ 35: baseball glove
54
+ 36: skateboard
55
+ 37: surfboard
56
+ 38: tennis racket
57
+ 39: bottle
58
+ 40: wine glass
59
+ 41: cup
60
+ 42: fork
61
+ 43: knife
62
+ 44: spoon
63
+ 45: bowl
64
+ 46: banana
65
+ 47: apple
66
+ 48: sandwich
67
+ 49: orange
68
+ 50: broccoli
69
+ 51: carrot
70
+ 52: hot dog
71
+ 53: pizza
72
+ 54: donut
73
+ 55: cake
74
+ 56: chair
75
+ 57: couch
76
+ 58: potted plant
77
+ 59: bed
78
+ 60: dining table
79
+ 61: toilet
80
+ 62: tv
81
+ 63: laptop
82
+ 64: mouse
83
+ 65: remote
84
+ 66: keyboard
85
+ 67: cell phone
86
+ 68: microwave
87
+ 69: oven
88
+ 70: toaster
89
+ 71: sink
90
+ 72: refrigerator
91
+ 73: book
92
+ 74: clock
93
+ 75: vase
94
+ 76: scissors
95
+ 77: teddy bear
96
+ 78: hair drier
97
+ 79: toothbrush
98
+
99
+
100
+ # Download script/URL (optional)
101
+ download: https://ultralytics.com/assets/coco128.zip
ultralytics/cfg/datasets/coco8-pose.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
3
+ # Example usage: yolo train data=coco8-pose.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco8-pose ← downloads here (1 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco8-pose # dataset root dir
12
+ train: images/train # train images (relative to 'path') 4 images
13
+ val: images/val # val images (relative to 'path') 4 images
14
+ test: # test images (optional)
15
+
16
+ # Keypoints
17
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
18
+ flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
19
+
20
+ # Classes
21
+ names:
22
+ 0: person
23
+
24
+ # Download script/URL (optional)
25
+ download: https://ultralytics.com/assets/coco8-pose.zip
ultralytics/cfg/datasets/coco8-seg.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
3
+ # Example usage: yolo train data=coco8-seg.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco8-seg ← downloads here (1 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco8-seg # dataset root dir
12
+ train: images/train # train images (relative to 'path') 4 images
13
+ val: images/val # val images (relative to 'path') 4 images
14
+ test: # test images (optional)
15
+
16
+ # Classes
17
+ names:
18
+ 0: person
19
+ 1: bicycle
20
+ 2: car
21
+ 3: motorcycle
22
+ 4: airplane
23
+ 5: bus
24
+ 6: train
25
+ 7: truck
26
+ 8: boat
27
+ 9: traffic light
28
+ 10: fire hydrant
29
+ 11: stop sign
30
+ 12: parking meter
31
+ 13: bench
32
+ 14: bird
33
+ 15: cat
34
+ 16: dog
35
+ 17: horse
36
+ 18: sheep
37
+ 19: cow
38
+ 20: elephant
39
+ 21: bear
40
+ 22: zebra
41
+ 23: giraffe
42
+ 24: backpack
43
+ 25: umbrella
44
+ 26: handbag
45
+ 27: tie
46
+ 28: suitcase
47
+ 29: frisbee
48
+ 30: skis
49
+ 31: snowboard
50
+ 32: sports ball
51
+ 33: kite
52
+ 34: baseball bat
53
+ 35: baseball glove
54
+ 36: skateboard
55
+ 37: surfboard
56
+ 38: tennis racket
57
+ 39: bottle
58
+ 40: wine glass
59
+ 41: cup
60
+ 42: fork
61
+ 43: knife
62
+ 44: spoon
63
+ 45: bowl
64
+ 46: banana
65
+ 47: apple
66
+ 48: sandwich
67
+ 49: orange
68
+ 50: broccoli
69
+ 51: carrot
70
+ 52: hot dog
71
+ 53: pizza
72
+ 54: donut
73
+ 55: cake
74
+ 56: chair
75
+ 57: couch
76
+ 58: potted plant
77
+ 59: bed
78
+ 60: dining table
79
+ 61: toilet
80
+ 62: tv
81
+ 63: laptop
82
+ 64: mouse
83
+ 65: remote
84
+ 66: keyboard
85
+ 67: cell phone
86
+ 68: microwave
87
+ 69: oven
88
+ 70: toaster
89
+ 71: sink
90
+ 72: refrigerator
91
+ 73: book
92
+ 74: clock
93
+ 75: vase
94
+ 76: scissors
95
+ 77: teddy bear
96
+ 78: hair drier
97
+ 79: toothbrush
98
+
99
+
100
+ # Download script/URL (optional)
101
+ download: https://ultralytics.com/assets/coco8-seg.zip
ultralytics/cfg/datasets/coco8.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
3
+ # Example usage: yolo train data=coco8.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── coco8 ← downloads here (1 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/coco8 # dataset root dir
12
+ train: images/train # train images (relative to 'path') 4 images
13
+ val: images/val # val images (relative to 'path') 4 images
14
+ test: # test images (optional)
15
+
16
+ # Classes
17
+ names:
18
+ 0: person
19
+ 1: bicycle
20
+ 2: car
21
+ 3: motorcycle
22
+ 4: airplane
23
+ 5: bus
24
+ 6: train
25
+ 7: truck
26
+ 8: boat
27
+ 9: traffic light
28
+ 10: fire hydrant
29
+ 11: stop sign
30
+ 12: parking meter
31
+ 13: bench
32
+ 14: bird
33
+ 15: cat
34
+ 16: dog
35
+ 17: horse
36
+ 18: sheep
37
+ 19: cow
38
+ 20: elephant
39
+ 21: bear
40
+ 22: zebra
41
+ 23: giraffe
42
+ 24: backpack
43
+ 25: umbrella
44
+ 26: handbag
45
+ 27: tie
46
+ 28: suitcase
47
+ 29: frisbee
48
+ 30: skis
49
+ 31: snowboard
50
+ 32: sports ball
51
+ 33: kite
52
+ 34: baseball bat
53
+ 35: baseball glove
54
+ 36: skateboard
55
+ 37: surfboard
56
+ 38: tennis racket
57
+ 39: bottle
58
+ 40: wine glass
59
+ 41: cup
60
+ 42: fork
61
+ 43: knife
62
+ 44: spoon
63
+ 45: bowl
64
+ 46: banana
65
+ 47: apple
66
+ 48: sandwich
67
+ 49: orange
68
+ 50: broccoli
69
+ 51: carrot
70
+ 52: hot dog
71
+ 53: pizza
72
+ 54: donut
73
+ 55: cake
74
+ 56: chair
75
+ 57: couch
76
+ 58: potted plant
77
+ 59: bed
78
+ 60: dining table
79
+ 61: toilet
80
+ 62: tv
81
+ 63: laptop
82
+ 64: mouse
83
+ 65: remote
84
+ 66: keyboard
85
+ 67: cell phone
86
+ 68: microwave
87
+ 69: oven
88
+ 70: toaster
89
+ 71: sink
90
+ 72: refrigerator
91
+ 73: book
92
+ 74: clock
93
+ 75: vase
94
+ 76: scissors
95
+ 77: teddy bear
96
+ 78: hair drier
97
+ 79: toothbrush
98
+
99
+
100
+ # Download script/URL (optional)
101
+ download: https://ultralytics.com/assets/coco8.zip
ultralytics/cfg/datasets/open-images-v7.yaml ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
3
+ # Example usage: yolo train data=open-images-v7.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── open-images-v7 ← downloads here (561 GB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/open-images-v7 # dataset root dir
12
+ train: images/train # train images (relative to 'path') 1743042 images
13
+ val: images/val # val images (relative to 'path') 41620 images
14
+ test: # test images (optional)
15
+
16
+ # Classes
17
+ names:
18
+ 0: Accordion
19
+ 1: Adhesive tape
20
+ 2: Aircraft
21
+ 3: Airplane
22
+ 4: Alarm clock
23
+ 5: Alpaca
24
+ 6: Ambulance
25
+ 7: Animal
26
+ 8: Ant
27
+ 9: Antelope
28
+ 10: Apple
29
+ 11: Armadillo
30
+ 12: Artichoke
31
+ 13: Auto part
32
+ 14: Axe
33
+ 15: Backpack
34
+ 16: Bagel
35
+ 17: Baked goods
36
+ 18: Balance beam
37
+ 19: Ball
38
+ 20: Balloon
39
+ 21: Banana
40
+ 22: Band-aid
41
+ 23: Banjo
42
+ 24: Barge
43
+ 25: Barrel
44
+ 26: Baseball bat
45
+ 27: Baseball glove
46
+ 28: Bat (Animal)
47
+ 29: Bathroom accessory
48
+ 30: Bathroom cabinet
49
+ 31: Bathtub
50
+ 32: Beaker
51
+ 33: Bear
52
+ 34: Bed
53
+ 35: Bee
54
+ 36: Beehive
55
+ 37: Beer
56
+ 38: Beetle
57
+ 39: Bell pepper
58
+ 40: Belt
59
+ 41: Bench
60
+ 42: Bicycle
61
+ 43: Bicycle helmet
62
+ 44: Bicycle wheel
63
+ 45: Bidet
64
+ 46: Billboard
65
+ 47: Billiard table
66
+ 48: Binoculars
67
+ 49: Bird
68
+ 50: Blender
69
+ 51: Blue jay
70
+ 52: Boat
71
+ 53: Bomb
72
+ 54: Book
73
+ 55: Bookcase
74
+ 56: Boot
75
+ 57: Bottle
76
+ 58: Bottle opener
77
+ 59: Bow and arrow
78
+ 60: Bowl
79
+ 61: Bowling equipment
80
+ 62: Box
81
+ 63: Boy
82
+ 64: Brassiere
83
+ 65: Bread
84
+ 66: Briefcase
85
+ 67: Broccoli
86
+ 68: Bronze sculpture
87
+ 69: Brown bear
88
+ 70: Building
89
+ 71: Bull
90
+ 72: Burrito
91
+ 73: Bus
92
+ 74: Bust
93
+ 75: Butterfly
94
+ 76: Cabbage
95
+ 77: Cabinetry
96
+ 78: Cake
97
+ 79: Cake stand
98
+ 80: Calculator
99
+ 81: Camel
100
+ 82: Camera
101
+ 83: Can opener
102
+ 84: Canary
103
+ 85: Candle
104
+ 86: Candy
105
+ 87: Cannon
106
+ 88: Canoe
107
+ 89: Cantaloupe
108
+ 90: Car
109
+ 91: Carnivore
110
+ 92: Carrot
111
+ 93: Cart
112
+ 94: Cassette deck
113
+ 95: Castle
114
+ 96: Cat
115
+ 97: Cat furniture
116
+ 98: Caterpillar
117
+ 99: Cattle
118
+ 100: Ceiling fan
119
+ 101: Cello
120
+ 102: Centipede
121
+ 103: Chainsaw
122
+ 104: Chair
123
+ 105: Cheese
124
+ 106: Cheetah
125
+ 107: Chest of drawers
126
+ 108: Chicken
127
+ 109: Chime
128
+ 110: Chisel
129
+ 111: Chopsticks
130
+ 112: Christmas tree
131
+ 113: Clock
132
+ 114: Closet
133
+ 115: Clothing
134
+ 116: Coat
135
+ 117: Cocktail
136
+ 118: Cocktail shaker
137
+ 119: Coconut
138
+ 120: Coffee
139
+ 121: Coffee cup
140
+ 122: Coffee table
141
+ 123: Coffeemaker
142
+ 124: Coin
143
+ 125: Common fig
144
+ 126: Common sunflower
145
+ 127: Computer keyboard
146
+ 128: Computer monitor
147
+ 129: Computer mouse
148
+ 130: Container
149
+ 131: Convenience store
150
+ 132: Cookie
151
+ 133: Cooking spray
152
+ 134: Corded phone
153
+ 135: Cosmetics
154
+ 136: Couch
155
+ 137: Countertop
156
+ 138: Cowboy hat
157
+ 139: Crab
158
+ 140: Cream
159
+ 141: Cricket ball
160
+ 142: Crocodile
161
+ 143: Croissant
162
+ 144: Crown
163
+ 145: Crutch
164
+ 146: Cucumber
165
+ 147: Cupboard
166
+ 148: Curtain
167
+ 149: Cutting board
168
+ 150: Dagger
169
+ 151: Dairy Product
170
+ 152: Deer
171
+ 153: Desk
172
+ 154: Dessert
173
+ 155: Diaper
174
+ 156: Dice
175
+ 157: Digital clock
176
+ 158: Dinosaur
177
+ 159: Dishwasher
178
+ 160: Dog
179
+ 161: Dog bed
180
+ 162: Doll
181
+ 163: Dolphin
182
+ 164: Door
183
+ 165: Door handle
184
+ 166: Doughnut
185
+ 167: Dragonfly
186
+ 168: Drawer
187
+ 169: Dress
188
+ 170: Drill (Tool)
189
+ 171: Drink
190
+ 172: Drinking straw
191
+ 173: Drum
192
+ 174: Duck
193
+ 175: Dumbbell
194
+ 176: Eagle
195
+ 177: Earrings
196
+ 178: Egg (Food)
197
+ 179: Elephant
198
+ 180: Envelope
199
+ 181: Eraser
200
+ 182: Face powder
201
+ 183: Facial tissue holder
202
+ 184: Falcon
203
+ 185: Fashion accessory
204
+ 186: Fast food
205
+ 187: Fax
206
+ 188: Fedora
207
+ 189: Filing cabinet
208
+ 190: Fire hydrant
209
+ 191: Fireplace
210
+ 192: Fish
211
+ 193: Flag
212
+ 194: Flashlight
213
+ 195: Flower
214
+ 196: Flowerpot
215
+ 197: Flute
216
+ 198: Flying disc
217
+ 199: Food
218
+ 200: Food processor
219
+ 201: Football
220
+ 202: Football helmet
221
+ 203: Footwear
222
+ 204: Fork
223
+ 205: Fountain
224
+ 206: Fox
225
+ 207: French fries
226
+ 208: French horn
227
+ 209: Frog
228
+ 210: Fruit
229
+ 211: Frying pan
230
+ 212: Furniture
231
+ 213: Garden Asparagus
232
+ 214: Gas stove
233
+ 215: Giraffe
234
+ 216: Girl
235
+ 217: Glasses
236
+ 218: Glove
237
+ 219: Goat
238
+ 220: Goggles
239
+ 221: Goldfish
240
+ 222: Golf ball
241
+ 223: Golf cart
242
+ 224: Gondola
243
+ 225: Goose
244
+ 226: Grape
245
+ 227: Grapefruit
246
+ 228: Grinder
247
+ 229: Guacamole
248
+ 230: Guitar
249
+ 231: Hair dryer
250
+ 232: Hair spray
251
+ 233: Hamburger
252
+ 234: Hammer
253
+ 235: Hamster
254
+ 236: Hand dryer
255
+ 237: Handbag
256
+ 238: Handgun
257
+ 239: Harbor seal
258
+ 240: Harmonica
259
+ 241: Harp
260
+ 242: Harpsichord
261
+ 243: Hat
262
+ 244: Headphones
263
+ 245: Heater
264
+ 246: Hedgehog
265
+ 247: Helicopter
266
+ 248: Helmet
267
+ 249: High heels
268
+ 250: Hiking equipment
269
+ 251: Hippopotamus
270
+ 252: Home appliance
271
+ 253: Honeycomb
272
+ 254: Horizontal bar
273
+ 255: Horse
274
+ 256: Hot dog
275
+ 257: House
276
+ 258: Houseplant
277
+ 259: Human arm
278
+ 260: Human beard
279
+ 261: Human body
280
+ 262: Human ear
281
+ 263: Human eye
282
+ 264: Human face
283
+ 265: Human foot
284
+ 266: Human hair
285
+ 267: Human hand
286
+ 268: Human head
287
+ 269: Human leg
288
+ 270: Human mouth
289
+ 271: Human nose
290
+ 272: Humidifier
291
+ 273: Ice cream
292
+ 274: Indoor rower
293
+ 275: Infant bed
294
+ 276: Insect
295
+ 277: Invertebrate
296
+ 278: Ipod
297
+ 279: Isopod
298
+ 280: Jacket
299
+ 281: Jacuzzi
300
+ 282: Jaguar (Animal)
301
+ 283: Jeans
302
+ 284: Jellyfish
303
+ 285: Jet ski
304
+ 286: Jug
305
+ 287: Juice
306
+ 288: Kangaroo
307
+ 289: Kettle
308
+ 290: Kitchen & dining room table
309
+ 291: Kitchen appliance
310
+ 292: Kitchen knife
311
+ 293: Kitchen utensil
312
+ 294: Kitchenware
313
+ 295: Kite
314
+ 296: Knife
315
+ 297: Koala
316
+ 298: Ladder
317
+ 299: Ladle
318
+ 300: Ladybug
319
+ 301: Lamp
320
+ 302: Land vehicle
321
+ 303: Lantern
322
+ 304: Laptop
323
+ 305: Lavender (Plant)
324
+ 306: Lemon
325
+ 307: Leopard
326
+ 308: Light bulb
327
+ 309: Light switch
328
+ 310: Lighthouse
329
+ 311: Lily
330
+ 312: Limousine
331
+ 313: Lion
332
+ 314: Lipstick
333
+ 315: Lizard
334
+ 316: Lobster
335
+ 317: Loveseat
336
+ 318: Luggage and bags
337
+ 319: Lynx
338
+ 320: Magpie
339
+ 321: Mammal
340
+ 322: Man
341
+ 323: Mango
342
+ 324: Maple
343
+ 325: Maracas
344
+ 326: Marine invertebrates
345
+ 327: Marine mammal
346
+ 328: Measuring cup
347
+ 329: Mechanical fan
348
+ 330: Medical equipment
349
+ 331: Microphone
350
+ 332: Microwave oven
351
+ 333: Milk
352
+ 334: Miniskirt
353
+ 335: Mirror
354
+ 336: Missile
355
+ 337: Mixer
356
+ 338: Mixing bowl
357
+ 339: Mobile phone
358
+ 340: Monkey
359
+ 341: Moths and butterflies
360
+ 342: Motorcycle
361
+ 343: Mouse
362
+ 344: Muffin
363
+ 345: Mug
364
+ 346: Mule
365
+ 347: Mushroom
366
+ 348: Musical instrument
367
+ 349: Musical keyboard
368
+ 350: Nail (Construction)
369
+ 351: Necklace
370
+ 352: Nightstand
371
+ 353: Oboe
372
+ 354: Office building
373
+ 355: Office supplies
374
+ 356: Orange
375
+ 357: Organ (Musical Instrument)
376
+ 358: Ostrich
377
+ 359: Otter
378
+ 360: Oven
379
+ 361: Owl
380
+ 362: Oyster
381
+ 363: Paddle
382
+ 364: Palm tree
383
+ 365: Pancake
384
+ 366: Panda
385
+ 367: Paper cutter
386
+ 368: Paper towel
387
+ 369: Parachute
388
+ 370: Parking meter
389
+ 371: Parrot
390
+ 372: Pasta
391
+ 373: Pastry
392
+ 374: Peach
393
+ 375: Pear
394
+ 376: Pen
395
+ 377: Pencil case
396
+ 378: Pencil sharpener
397
+ 379: Penguin
398
+ 380: Perfume
399
+ 381: Person
400
+ 382: Personal care
401
+ 383: Personal flotation device
402
+ 384: Piano
403
+ 385: Picnic basket
404
+ 386: Picture frame
405
+ 387: Pig
406
+ 388: Pillow
407
+ 389: Pineapple
408
+ 390: Pitcher (Container)
409
+ 391: Pizza
410
+ 392: Pizza cutter
411
+ 393: Plant
412
+ 394: Plastic bag
413
+ 395: Plate
414
+ 396: Platter
415
+ 397: Plumbing fixture
416
+ 398: Polar bear
417
+ 399: Pomegranate
418
+ 400: Popcorn
419
+ 401: Porch
420
+ 402: Porcupine
421
+ 403: Poster
422
+ 404: Potato
423
+ 405: Power plugs and sockets
424
+ 406: Pressure cooker
425
+ 407: Pretzel
426
+ 408: Printer
427
+ 409: Pumpkin
428
+ 410: Punching bag
429
+ 411: Rabbit
430
+ 412: Raccoon
431
+ 413: Racket
432
+ 414: Radish
433
+ 415: Ratchet (Device)
434
+ 416: Raven
435
+ 417: Rays and skates
436
+ 418: Red panda
437
+ 419: Refrigerator
438
+ 420: Remote control
439
+ 421: Reptile
440
+ 422: Rhinoceros
441
+ 423: Rifle
442
+ 424: Ring binder
443
+ 425: Rocket
444
+ 426: Roller skates
445
+ 427: Rose
446
+ 428: Rugby ball
447
+ 429: Ruler
448
+ 430: Salad
449
+ 431: Salt and pepper shakers
450
+ 432: Sandal
451
+ 433: Sandwich
452
+ 434: Saucer
453
+ 435: Saxophone
454
+ 436: Scale
455
+ 437: Scarf
456
+ 438: Scissors
457
+ 439: Scoreboard
458
+ 440: Scorpion
459
+ 441: Screwdriver
460
+ 442: Sculpture
461
+ 443: Sea lion
462
+ 444: Sea turtle
463
+ 445: Seafood
464
+ 446: Seahorse
465
+ 447: Seat belt
466
+ 448: Segway
467
+ 449: Serving tray
468
+ 450: Sewing machine
469
+ 451: Shark
470
+ 452: Sheep
471
+ 453: Shelf
472
+ 454: Shellfish
473
+ 455: Shirt
474
+ 456: Shorts
475
+ 457: Shotgun
476
+ 458: Shower
477
+ 459: Shrimp
478
+ 460: Sink
479
+ 461: Skateboard
480
+ 462: Ski
481
+ 463: Skirt
482
+ 464: Skull
483
+ 465: Skunk
484
+ 466: Skyscraper
485
+ 467: Slow cooker
486
+ 468: Snack
487
+ 469: Snail
488
+ 470: Snake
489
+ 471: Snowboard
490
+ 472: Snowman
491
+ 473: Snowmobile
492
+ 474: Snowplow
493
+ 475: Soap dispenser
494
+ 476: Sock
495
+ 477: Sofa bed
496
+ 478: Sombrero
497
+ 479: Sparrow
498
+ 480: Spatula
499
+ 481: Spice rack
500
+ 482: Spider
501
+ 483: Spoon
502
+ 484: Sports equipment
503
+ 485: Sports uniform
504
+ 486: Squash (Plant)
505
+ 487: Squid
506
+ 488: Squirrel
507
+ 489: Stairs
508
+ 490: Stapler
509
+ 491: Starfish
510
+ 492: Stationary bicycle
511
+ 493: Stethoscope
512
+ 494: Stool
513
+ 495: Stop sign
514
+ 496: Strawberry
515
+ 497: Street light
516
+ 498: Stretcher
517
+ 499: Studio couch
518
+ 500: Submarine
519
+ 501: Submarine sandwich
520
+ 502: Suit
521
+ 503: Suitcase
522
+ 504: Sun hat
523
+ 505: Sunglasses
524
+ 506: Surfboard
525
+ 507: Sushi
526
+ 508: Swan
527
+ 509: Swim cap
528
+ 510: Swimming pool
529
+ 511: Swimwear
530
+ 512: Sword
531
+ 513: Syringe
532
+ 514: Table
533
+ 515: Table tennis racket
534
+ 516: Tablet computer
535
+ 517: Tableware
536
+ 518: Taco
537
+ 519: Tank
538
+ 520: Tap
539
+ 521: Tart
540
+ 522: Taxi
541
+ 523: Tea
542
+ 524: Teapot
543
+ 525: Teddy bear
544
+ 526: Telephone
545
+ 527: Television
546
+ 528: Tennis ball
547
+ 529: Tennis racket
548
+ 530: Tent
549
+ 531: Tiara
550
+ 532: Tick
551
+ 533: Tie
552
+ 534: Tiger
553
+ 535: Tin can
554
+ 536: Tire
555
+ 537: Toaster
556
+ 538: Toilet
557
+ 539: Toilet paper
558
+ 540: Tomato
559
+ 541: Tool
560
+ 542: Toothbrush
561
+ 543: Torch
562
+ 544: Tortoise
563
+ 545: Towel
564
+ 546: Tower
565
+ 547: Toy
566
+ 548: Traffic light
567
+ 549: Traffic sign
568
+ 550: Train
569
+ 551: Training bench
570
+ 552: Treadmill
571
+ 553: Tree
572
+ 554: Tree house
573
+ 555: Tripod
574
+ 556: Trombone
575
+ 557: Trousers
576
+ 558: Truck
577
+ 559: Trumpet
578
+ 560: Turkey
579
+ 561: Turtle
580
+ 562: Umbrella
581
+ 563: Unicycle
582
+ 564: Van
583
+ 565: Vase
584
+ 566: Vegetable
585
+ 567: Vehicle
586
+ 568: Vehicle registration plate
587
+ 569: Violin
588
+ 570: Volleyball (Ball)
589
+ 571: Waffle
590
+ 572: Waffle iron
591
+ 573: Wall clock
592
+ 574: Wardrobe
593
+ 575: Washing machine
594
+ 576: Waste container
595
+ 577: Watch
596
+ 578: Watercraft
597
+ 579: Watermelon
598
+ 580: Weapon
599
+ 581: Whale
600
+ 582: Wheel
601
+ 583: Wheelchair
602
+ 584: Whisk
603
+ 585: Whiteboard
604
+ 586: Willow
605
+ 587: Window
606
+ 588: Window blind
607
+ 589: Wine
608
+ 590: Wine glass
609
+ 591: Wine rack
610
+ 592: Winter melon
611
+ 593: Wok
612
+ 594: Woman
613
+ 595: Wood-burning stove
614
+ 596: Woodpecker
615
+ 597: Worm
616
+ 598: Wrench
617
+ 599: Zebra
618
+ 600: Zucchini
619
+
620
+
621
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
622
+ download: |
623
+ from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version
624
+ from ultralytics.utils.checks import check_requirements, check_version
625
+
626
+ check_requirements('fiftyone')
627
+ if is_ubuntu() and check_version(get_ubuntu_version(), '>=22.04'):
628
+ # Ubuntu>=22.04 patch https://github.com/voxel51/fiftyone/issues/2961#issuecomment-1666519347
629
+ check_requirements('fiftyone-db-ubuntu2204')
630
+
631
+ import fiftyone as fo
632
+ import fiftyone.zoo as foz
633
+ import warnings
634
+
635
+ name = 'open-images-v7'
636
+ fraction = 1.0 # fraction of full dataset to use
637
+ LOGGER.warning('WARNING ⚠️ Open Images V7 dataset requires at least **561 GB of free space. Starting download...')
638
+ for split in 'train', 'validation': # 1743042 train, 41620 val images
639
+ train = split == 'train'
640
+
641
+ # Load Open Images dataset
642
+ dataset = foz.load_zoo_dataset(name,
643
+ split=split,
644
+ label_types=['detections'],
645
+ dataset_dir=Path(SETTINGS['datasets_dir']) / 'fiftyone' / name,
646
+ max_samples=round((1743042 if train else 41620) * fraction))
647
+
648
+ # Define classes
649
+ if train:
650
+ classes = dataset.default_classes # all classes
651
+ # classes = dataset.distinct('ground_truth.detections.label') # only observed classes
652
+
653
+ # Export to YOLO format
654
+ with warnings.catch_warnings():
655
+ warnings.filterwarnings("ignore", category=UserWarning, module="fiftyone.utils.yolo")
656
+ dataset.export(export_dir=str(Path(SETTINGS['datasets_dir']) / name),
657
+ dataset_type=fo.types.YOLOv5Dataset,
658
+ label_field='ground_truth',
659
+ split='val' if split == 'validation' else split,
660
+ classes=classes,
661
+ overwrite=train)
ultralytics/cfg/datasets/tiger-pose.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Tiger Pose dataset by Ultralytics
3
+ # Example usage: yolo train data=tiger-pose.yaml
4
+ # parent
5
+ # ├── ultralytics
6
+ # └── datasets
7
+ # └── tiger-pose ← downloads here (75.3 MB)
8
+
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ../datasets/tiger-pose # dataset root dir
12
+ train: train # train images (relative to 'path') 210 images
13
+ val: val # val images (relative to 'path') 53 images
14
+
15
+ # Keypoints
16
+ kpt_shape: [12, 2] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
17
+ flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
18
+
19
+ # Classes
20
+ names:
21
+ 0: tiger
22
+
23
+ # Download script/URL (optional)
24
+ download: https://ultralytics.com/assets/tiger-pose.zip
ultralytics/cfg/datasets/xView.yaml ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
3
+ # -------- DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command! --------
4
+ # Example usage: yolo train data=xView.yaml
5
+ # parent
6
+ # ├── ultralytics
7
+ # └── datasets
8
+ # └── xView ← downloads here (20.7 GB)
9
+
10
+
11
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
12
+ path: ../datasets/xView # dataset root dir
13
+ train: images/autosplit_train.txt # train images (relative to 'path') 90% of 847 train images
14
+ val: images/autosplit_val.txt # train images (relative to 'path') 10% of 847 train images
15
+
16
+ # Classes
17
+ names:
18
+ 0: Fixed-wing Aircraft
19
+ 1: Small Aircraft
20
+ 2: Cargo Plane
21
+ 3: Helicopter
22
+ 4: Passenger Vehicle
23
+ 5: Small Car
24
+ 6: Bus
25
+ 7: Pickup Truck
26
+ 8: Utility Truck
27
+ 9: Truck
28
+ 10: Cargo Truck
29
+ 11: Truck w/Box
30
+ 12: Truck Tractor
31
+ 13: Trailer
32
+ 14: Truck w/Flatbed
33
+ 15: Truck w/Liquid
34
+ 16: Crane Truck
35
+ 17: Railway Vehicle
36
+ 18: Passenger Car
37
+ 19: Cargo Car
38
+ 20: Flat Car
39
+ 21: Tank car
40
+ 22: Locomotive
41
+ 23: Maritime Vessel
42
+ 24: Motorboat
43
+ 25: Sailboat
44
+ 26: Tugboat
45
+ 27: Barge
46
+ 28: Fishing Vessel
47
+ 29: Ferry
48
+ 30: Yacht
49
+ 31: Container Ship
50
+ 32: Oil Tanker
51
+ 33: Engineering Vehicle
52
+ 34: Tower crane
53
+ 35: Container Crane
54
+ 36: Reach Stacker
55
+ 37: Straddle Carrier
56
+ 38: Mobile Crane
57
+ 39: Dump Truck
58
+ 40: Haul Truck
59
+ 41: Scraper/Tractor
60
+ 42: Front loader/Bulldozer
61
+ 43: Excavator
62
+ 44: Cement Mixer
63
+ 45: Ground Grader
64
+ 46: Hut/Tent
65
+ 47: Shed
66
+ 48: Building
67
+ 49: Aircraft Hangar
68
+ 50: Damaged Building
69
+ 51: Facility
70
+ 52: Construction Site
71
+ 53: Vehicle Lot
72
+ 54: Helipad
73
+ 55: Storage Tank
74
+ 56: Shipping container lot
75
+ 57: Shipping Container
76
+ 58: Pylon
77
+ 59: Tower
78
+
79
+
80
+ # Download script/URL (optional) ---------------------------------------------------------------------------------------
81
+ download: |
82
+ import json
83
+ import os
84
+ from pathlib import Path
85
+
86
+ import numpy as np
87
+ from PIL import Image
88
+ from tqdm import tqdm
89
+
90
+ from ultralytics.data.utils import autosplit
91
+ from ultralytics.utils.ops import xyxy2xywhn
92
+
93
+
94
+ def convert_labels(fname=Path('xView/xView_train.geojson')):
95
+ # Convert xView geoJSON labels to YOLO format
96
+ path = fname.parent
97
+ with open(fname) as f:
98
+ print(f'Loading {fname}...')
99
+ data = json.load(f)
100
+
101
+ # Make dirs
102
+ labels = Path(path / 'labels' / 'train')
103
+ os.system(f'rm -rf {labels}')
104
+ labels.mkdir(parents=True, exist_ok=True)
105
+
106
+ # xView classes 11-94 to 0-59
107
+ xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
108
+ 12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
109
+ 29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
110
+ 47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
111
+
112
+ shapes = {}
113
+ for feature in tqdm(data['features'], desc=f'Converting {fname}'):
114
+ p = feature['properties']
115
+ if p['bounds_imcoords']:
116
+ id = p['image_id']
117
+ file = path / 'train_images' / id
118
+ if file.exists(): # 1395.tif missing
119
+ try:
120
+ box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
121
+ assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
122
+ cls = p['type_id']
123
+ cls = xview_class2index[int(cls)] # xView class to 0-60
124
+ assert 59 >= cls >= 0, f'incorrect class index {cls}'
125
+
126
+ # Write YOLO label
127
+ if id not in shapes:
128
+ shapes[id] = Image.open(file).size
129
+ box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
130
+ with open((labels / id).with_suffix('.txt'), 'a') as f:
131
+ f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n") # write label.txt
132
+ except Exception as e:
133
+ print(f'WARNING: skipping one label for {file}: {e}')
134
+
135
+
136
+ # Download manually from https://challenge.xviewdataset.org
137
+ dir = Path(yaml['path']) # dataset root dir
138
+ # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip', # train labels
139
+ # 'https://d307kc0mrhucc3.cloudfront.net/train_images.zip', # 15G, 847 train images
140
+ # 'https://d307kc0mrhucc3.cloudfront.net/val_images.zip'] # 5G, 282 val images (no labels)
141
+ # download(urls, dir=dir)
142
+
143
+ # Convert labels
144
+ convert_labels(dir / 'xView_train.geojson')
145
+
146
+ # Move images
147
+ images = Path(dir / 'images')
148
+ images.mkdir(parents=True, exist_ok=True)
149
+ Path(dir / 'train_images').rename(dir / 'images' / 'train')
150
+ Path(dir / 'val_images').rename(dir / 'images' / 'val')
151
+
152
+ # Split
153
+ autosplit(dir / 'images' / 'train')
ultralytics/cfg/default.yaml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Default training settings and hyperparameters for medium-augmentation COCO training
3
+
4
+ task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
5
+ mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
6
+
7
+ # Train settings -------------------------------------------------------------------------------------------------------
8
+ model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
9
+ data: # (str, optional) path to data file, i.e. coco128.yaml
10
+ epochs: 100 # (int) number of epochs to train for
11
+ patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training
12
+ batch: 16 # (int) number of images per batch (-1 for AutoBatch)
13
+ imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
14
+ save: True # (bool) save train checkpoints and predict results
15
+ save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
16
+ cache: False # (bool) True/ram, disk or False. Use cache for data loading
17
+ device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
18
+ workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
19
+ project: # (str, optional) project name
20
+ name: # (str, optional) experiment name, results saved to 'project/name' directory
21
+ exist_ok: False # (bool) whether to overwrite existing experiment
22
+ pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
23
+ optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
24
+ verbose: True # (bool) whether to print verbose output
25
+ seed: 0 # (int) random seed for reproducibility
26
+ deterministic: True # (bool) whether to enable deterministic mode
27
+ single_cls: False # (bool) train multi-class data as single-class
28
+ rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
29
+ cos_lr: False # (bool) use cosine learning rate scheduler
30
+ close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
31
+ resume: False # (bool) resume training from last checkpoint
32
+ amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
33
+ fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
34
+ profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
35
+ freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
36
+ # Segmentation
37
+ overlap_mask: True # (bool) masks should overlap during training (segment train only)
38
+ mask_ratio: 4 # (int) mask downsample ratio (segment train only)
39
+ # Classification
40
+ dropout: 0.0 # (float) use dropout regularization (classify train only)
41
+
42
+ # Val/Test settings ----------------------------------------------------------------------------------------------------
43
+ val: True # (bool) validate/test during training
44
+ split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
45
+ save_json: False # (bool) save results to JSON file
46
+ save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
47
+ conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
48
+ iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
49
+ max_det: 300 # (int) maximum number of detections per image
50
+ half: False # (bool) use half precision (FP16)
51
+ dnn: False # (bool) use OpenCV DNN for ONNX inference
52
+ plots: True # (bool) save plots and images during train/val
53
+
54
+ # Predict settings -----------------------------------------------------------------------------------------------------
55
+ source: # (str, optional) source directory for images or videos
56
+ vid_stride: 1 # (int) video frame-rate stride
57
+ stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
58
+ visualize: False # (bool) visualize model features
59
+ augment: False # (bool) apply image augmentation to prediction sources
60
+ agnostic_nms: False # (bool) class-agnostic NMS
61
+ classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
62
+ retina_masks: False # (bool) use high-resolution segmentation masks
63
+
64
+ # Visualize settings ---------------------------------------------------------------------------------------------------
65
+ show: False # (bool) show predicted images and videos if environment allows
66
+ save_frames: False # (bool) save predicted individual video frames
67
+ save_txt: False # (bool) save results as .txt file
68
+ save_conf: False # (bool) save results with confidence scores
69
+ save_crop: False # (bool) save cropped images with results
70
+ show_labels: True # (bool) show prediction labels, i.e. 'person'
71
+ show_conf: True # (bool) show prediction confidence, i.e. '0.99'
72
+ show_boxes: True # (bool) show prediction boxes
73
+ line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
74
+
75
+ # Export settings ------------------------------------------------------------------------------------------------------
76
+ format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
77
+ keras: False # (bool) use Kera=s
78
+ optimize: False # (bool) TorchScript: optimize for mobile
79
+ int8: False # (bool) CoreML/TF INT8 quantization
80
+ dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
81
+ simplify: False # (bool) ONNX: simplify model
82
+ opset: # (int, optional) ONNX: opset version
83
+ workspace: 4 # (int) TensorRT: workspace size (GB)
84
+ nms: False # (bool) CoreML: add NMS
85
+
86
+ # Hyperparameters ------------------------------------------------------------------------------------------------------
87
+ lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
88
+ lrf: 0.01 # (float) final learning rate (lr0 * lrf)
89
+ momentum: 0.937 # (float) SGD momentum/Adam beta1
90
+ weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
91
+ warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
92
+ warmup_momentum: 0.8 # (float) warmup initial momentum
93
+ warmup_bias_lr: 0.1 # (float) warmup initial bias lr
94
+ box: 7.5 # (float) box loss gain
95
+ cls: 0.5 # (float) cls loss gain (scale with pixels)
96
+ dfl: 1.5 # (float) dfl loss gain
97
+ pose: 12.0 # (float) pose loss gain
98
+ kobj: 1.0 # (float) keypoint obj loss gain
99
+ label_smoothing: 0.0 # (float) label smoothing (fraction)
100
+ nbs: 64 # (int) nominal batch size
101
+ hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
102
+ hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
103
+ hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
104
+ degrees: 0.0 # (float) image rotation (+/- deg)
105
+ translate: 0.1 # (float) image translation (+/- fraction)
106
+ scale: 0.5 # (float) image scale (+/- gain)
107
+ shear: 0.0 # (float) image shear (+/- deg)
108
+ perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
109
+ flipud: 0.0 # (float) image flip up-down (probability)
110
+ fliplr: 0.5 # (float) image flip left-right (probability)
111
+ mosaic: 1.0 # (float) image mosaic (probability)
112
+ mixup: 0.0 # (float) image mixup (probability)
113
+ copy_paste: 0.0 # (float) segment copy-paste (probability)
114
+
115
+ # Custom config.yaml ---------------------------------------------------------------------------------------------------
116
+ cfg: # (str, optional) for overriding defaults.yaml
117
+
118
+ # Tracker settings ------------------------------------------------------------------------------------------------------
119
+ tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
ultralytics/cfg/models/README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Models
2
+
3
+ Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
4
+
5
+ These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
6
+
7
+ To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
8
+
9
+ ### Usage
10
+
11
+ Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
12
+
13
+ ```bash
14
+ yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
15
+ ```
16
+
17
+ They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
18
+
19
+ ```python
20
+ from ultralytics import YOLO
21
+
22
+ model = YOLO("model.yaml") # build a YOLOv8n model from scratch
23
+ # YOLO("model.pt") use pre-trained model if available
24
+ model.info() # display model information
25
+ model.train(data="coco128.yaml", epochs=100) # train the model
26
+ ```
27
+
28
+ ## Pre-trained Model Architectures
29
+
30
+ Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
31
+
32
+ ## Contribute New Models
33
+
34
+ Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
35
+
36
+ By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
37
+
38
+ To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
39
+
40
+ Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ l: [1.00, 1.00, 1024]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, HGStem, [32, 48]] # 0-P2/4
13
+ - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
14
+
15
+ - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
+ - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
17
+
18
+ - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
19
+ - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
20
+ - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
21
+ - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
22
+
23
+ - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
24
+ - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
25
+
26
+ head:
27
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
28
+ - [-1, 1, AIFI, [1024, 8]]
29
+ - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
30
+
31
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
32
+ - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
33
+ - [[-2, -1], 1, Concat, [1]]
34
+ - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
35
+ - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
36
+
37
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
38
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
39
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
40
+ - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
41
+
42
+ - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
43
+ - [[-1, 17], 1, Concat, [1]] # cat Y4
44
+ - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
45
+
46
+ - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
47
+ - [[-1, 12], 1, Concat, [1]] # cat Y5
48
+ - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
49
+
50
+ - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-ResNet101 object detection model with P3-P5 outputs.
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ l: [1.00, 1.00, 1024]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
16
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17
+
18
+ head:
19
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20
+ - [-1, 1, AIFI, [1024, 8]]
21
+ - [-1, 1, Conv, [256, 1, 1]] # 7
22
+
23
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
24
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25
+ - [[-2, -1], 1, Concat, [1]]
26
+ - [-1, 3, RepC3, [256]] # 11
27
+ - [-1, 1, Conv, [256, 1, 1]] # 12
28
+
29
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
30
+ - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33
+
34
+ - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
+ - [[-1, 12], 1, Concat, [1]] # cat Y4
36
+ - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
+ - [[-1, 7], 1, Concat, [1]] # cat Y5
40
+ - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41
+
42
+ - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-ResNet50 object detection model with P3-P5 outputs.
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ l: [1.00, 1.00, 1024]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
16
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17
+
18
+ head:
19
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20
+ - [-1, 1, AIFI, [1024, 8]]
21
+ - [-1, 1, Conv, [256, 1, 1]] # 7
22
+
23
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
24
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25
+ - [[-2, -1], 1, Concat, [1]]
26
+ - [-1, 3, RepC3, [256]] # 11
27
+ - [-1, 1, Conv, [256, 1, 1]] # 12
28
+
29
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
30
+ - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33
+
34
+ - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
+ - [[-1, 12], 1, Concat, [1]] # cat Y4
36
+ - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
+ - [[-1, 7], 1, Concat, [1]] # cat Y5
40
+ - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41
+
42
+ - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ x: [1.00, 1.00, 2048]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, HGStem, [32, 64]] # 0-P2/4
13
+ - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
14
+
15
+ - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
+ - [-1, 6, HGBlock, [128, 512, 3]]
17
+ - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
18
+
19
+ - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
20
+ - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
21
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
22
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
23
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
24
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
25
+
26
+ - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
27
+ - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
28
+ - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
29
+
30
+ head:
31
+ - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
32
+ - [-1, 1, AIFI, [2048, 8]]
33
+ - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
36
+ - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
37
+ - [[-2, -1], 1, Concat, [1]]
38
+ - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
39
+ - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
42
+ - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
43
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
44
+ - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
45
+
46
+ - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
47
+ - [[-1, 21], 1, Concat, [1]] # cat Y4
48
+ - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
49
+
50
+ - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
51
+ - [[-1, 16], 1, Concat, [1]] # cat Y5
52
+ - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
53
+
54
+ - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/v3/yolov3-spp.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
+
9
+ # darknet53 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [32, 3, 1]], # 0
13
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
14
+ [-1, 1, Bottleneck, [64]],
15
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
16
+ [-1, 2, Bottleneck, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
18
+ [-1, 8, Bottleneck, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
20
+ [-1, 8, Bottleneck, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
22
+ [-1, 4, Bottleneck, [1024]], # 10
23
+ ]
24
+
25
+ # YOLOv3-SPP head
26
+ head:
27
+ [[-1, 1, Bottleneck, [1024, False]],
28
+ [-1, 1, SPP, [512, [5, 9, 13]]],
29
+ [-1, 1, Conv, [1024, 3, 1]],
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
32
+
33
+ [-2, 1, Conv, [256, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
36
+ [-1, 1, Bottleneck, [512, False]],
37
+ [-1, 1, Bottleneck, [512, False]],
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
40
+
41
+ [-2, 1, Conv, [128, 1, 1]],
42
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
43
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
44
+ [-1, 1, Bottleneck, [256, False]],
45
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
46
+
47
+ [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5)
48
+ ]
ultralytics/cfg/models/v3/yolov3-tiny.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
+
9
+ # YOLOv3-tiny backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [16, 3, 1]], # 0
13
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
14
+ [-1, 1, Conv, [32, 3, 1]],
15
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
16
+ [-1, 1, Conv, [64, 3, 1]],
17
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
18
+ [-1, 1, Conv, [128, 3, 1]],
19
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
20
+ [-1, 1, Conv, [256, 3, 1]],
21
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
22
+ [-1, 1, Conv, [512, 3, 1]],
23
+ [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
24
+ [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
25
+ ]
26
+
27
+ # YOLOv3-tiny head
28
+ head:
29
+ [[-1, 1, Conv, [1024, 3, 1]],
30
+ [-1, 1, Conv, [256, 1, 1]],
31
+ [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
32
+
33
+ [-2, 1, Conv, [128, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
36
+ [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
37
+
38
+ [[19, 15], 1, Detect, [nc]], # Detect(P4, P5)
39
+ ]
ultralytics/cfg/models/v3/yolov3.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
+
9
+ # darknet53 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [[-1, 1, Conv, [32, 3, 1]], # 0
13
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
14
+ [-1, 1, Bottleneck, [64]],
15
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
16
+ [-1, 2, Bottleneck, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
18
+ [-1, 8, Bottleneck, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
20
+ [-1, 8, Bottleneck, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
22
+ [-1, 4, Bottleneck, [1024]], # 10
23
+ ]
24
+
25
+ # YOLOv3 head
26
+ head:
27
+ [[-1, 1, Bottleneck, [1024, False]],
28
+ [-1, 1, Conv, [512, 1, 1]],
29
+ [-1, 1, Conv, [1024, 3, 1]],
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
32
+
33
+ [-2, 1, Conv, [256, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
36
+ [-1, 1, Bottleneck, [512, False]],
37
+ [-1, 1, Bottleneck, [512, False]],
38
+ [-1, 1, Conv, [256, 1, 1]],
39
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
40
+
41
+ [-2, 1, Conv, [128, 1, 1]],
42
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
43
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
44
+ [-1, 1, Bottleneck, [256, False]],
45
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
46
+
47
+ [[27, 22, 15], 1, Detect, [nc]], # Detect(P3, P4, P5)
48
+ ]
ultralytics/cfg/models/v5/yolov5-p6.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.33, 1.25, 1024]
13
+
14
+ # YOLOv5 v6.0 backbone
15
+ backbone:
16
+ # [from, number, module, args]
17
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
27
+ [-1, 3, C3, [1024]],
28
+ [-1, 1, SPPF, [1024, 5]], # 11
29
+ ]
30
+
31
+ # YOLOv5 v6.0 head
32
+ head:
33
+ [[-1, 1, Conv, [768, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
36
+ [-1, 3, C3, [768, False]], # 15
37
+
38
+ [-1, 1, Conv, [512, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
40
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
41
+ [-1, 3, C3, [512, False]], # 19
42
+
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
45
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
46
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
47
+
48
+ [-1, 1, Conv, [256, 3, 2]],
49
+ [[-1, 20], 1, Concat, [1]], # cat head P4
50
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
51
+
52
+ [-1, 1, Conv, [512, 3, 2]],
53
+ [[-1, 16], 1, Concat, [1]], # cat head P5
54
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
55
+
56
+ [-1, 1, Conv, [768, 3, 2]],
57
+ [[-1, 12], 1, Concat, [1]], # cat head P6
58
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
59
+
60
+ [[23, 26, 29, 32], 1, Detect, [nc]], # Detect(P3, P4, P5, P6)
61
+ ]
ultralytics/cfg/models/v5/yolov5.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.33, 1.25, 1024]
13
+
14
+ # YOLOv5 v6.0 backbone
15
+ backbone:
16
+ # [from, number, module, args]
17
+ [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [1024]],
26
+ [-1, 1, SPPF, [1024, 5]], # 9
27
+ ]
28
+
29
+ # YOLOv5 v6.0 head
30
+ head:
31
+ [[-1, 1, Conv, [512, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
33
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
34
+ [-1, 3, C3, [512, False]], # 13
35
+
36
+ [-1, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
39
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
40
+
41
+ [-1, 1, Conv, [256, 3, 2]],
42
+ [[-1, 14], 1, Concat, [1]], # cat head P4
43
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
44
+
45
+ [-1, 1, Conv, [512, 3, 2]],
46
+ [[-1, 10], 1, Concat, [1]], # cat head P5
47
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
48
+
49
+ [[17, 20, 23], 1, Detect, [nc]], # Detect(P3, P4, P5)
50
+ ]
ultralytics/cfg/models/v6/yolov6.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ activation: nn.ReLU() # (optional) model default activation function
7
+ scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv6-3.0s backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 6, Conv, [128, 3, 1]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 12, Conv, [256, 3, 1]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 18, Conv, [512, 3, 1]]
25
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 6, Conv, [1024, 3, 1]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv6-3.0s head
30
+ head:
31
+ - [-1, 1, Conv, [256, 1, 1]]
32
+ - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
33
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
34
+ - [-1, 1, Conv, [256, 3, 1]]
35
+ - [-1, 9, Conv, [256, 3, 1]] # 14
36
+
37
+ - [-1, 1, Conv, [128, 1, 1]]
38
+ - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
39
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
40
+ - [-1, 1, Conv, [128, 3, 1]]
41
+ - [-1, 9, Conv, [128, 3, 1]] # 19
42
+
43
+ - [-1, 1, Conv, [128, 3, 2]]
44
+ - [[-1, 15], 1, Concat, [1]] # cat head P4
45
+ - [-1, 1, Conv, [256, 3, 1]]
46
+ - [-1, 9, Conv, [256, 3, 1]] # 23
47
+
48
+ - [-1, 1, Conv, [256, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, Conv, [512, 3, 1]]
51
+ - [-1, 9, Conv, [512, 3, 1]] # 27
52
+
53
+ - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/v8/yolov8-cls.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
3
+
4
+ # Parameters
5
+ nc: 1000 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.00, 1.25, 1024]
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+
27
+ # YOLOv8.0n head
28
+ head:
29
+ - [-1, 1, Classify, [nc]] # Classify
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters, 2033928 gradients, 13.8 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters, 5562064 gradients, 25.1 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters, 9031712 gradients, 42.8 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients, 69.1 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
13
+
14
+ # YOLOv8.0-ghost backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C3Ghost, [128, True]]
20
+ - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C3Ghost, [256, True]]
22
+ - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C3Ghost, [512, True]]
24
+ - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C3Ghost, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0-ghost-p2 head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C3Ghost, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
39
+ - [[-1, 2], 1, Concat, [1]] # cat backbone P2
40
+ - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
41
+
42
+ - [-1, 1, GhostConv, [128, 3, 2]]
43
+ - [[-1, 15], 1, Concat, [1]] # cat head P3
44
+ - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
45
+
46
+ - [-1, 1, GhostConv, [256, 3, 2]]
47
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
48
+ - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
49
+
50
+ - [-1, 1, GhostConv, [512, 3, 2]]
51
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
52
+ - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
53
+
54
+ - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
13
+
14
+ # YOLOv8.0-ghost backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C3Ghost, [128, True]]
20
+ - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C3Ghost, [256, True]]
22
+ - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C3Ghost, [512, True]]
24
+ - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C3Ghost, [768, True]]
26
+ - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C3Ghost, [1024, True]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv8.0-ghost-p6 head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
33
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34
+ - [-1, 3, C3Ghost, [768]] # 14
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 3, C3Ghost, [512]] # 17
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
43
+
44
+ - [-1, 1, GhostConv, [256, 3, 2]]
45
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
46
+ - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
47
+
48
+ - [-1, 1, GhostConv, [512, 3, 2]]
49
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
50
+ - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
51
+
52
+ - [-1, 1, GhostConv, [768, 3, 2]]
53
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
54
+ - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
55
+
56
+ - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
ultralytics/cfg/models/v8/yolov8-ghost.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+ # Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
4
+
5
+ # Parameters
6
+ nc: 80 # number of classes
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers, 1865316 parameters, 1865300 gradients, 5.8 GFLOPs
10
+ s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers, 5960072 parameters, 5960056 gradients, 16.4 GFLOPs
11
+ m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients, 32.7 GFLOPs
12
+ l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients, 53.7 GFLOPs
13
+ x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients, 83.3 GFLOPs
14
+
15
+ # YOLOv8.0n-ghost backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C3Ghost, [128, True]]
21
+ - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C3Ghost, [256, True]]
23
+ - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C3Ghost, [512, True]]
25
+ - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C3Ghost, [1024, True]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv8.0n head
30
+ head:
31
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
32
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 3, C3Ghost, [512]] # 12
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
36
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37
+ - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
38
+
39
+ - [-1, 1, GhostConv, [256, 3, 2]]
40
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
41
+ - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
42
+
43
+ - [-1, 1, GhostConv, [512, 3, 2]]
44
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
45
+ - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
46
+
47
+ - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/v8/yolov8-p2.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0 backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0-p2 head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
39
+ - [[-1, 2], 1, Concat, [1]] # cat backbone P2
40
+ - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
41
+
42
+ - [-1, 1, Conv, [128, 3, 2]]
43
+ - [[-1, 15], 1, Concat, [1]] # cat head P3
44
+ - [-1, 3, C2f, [256]] # 21 (P3/8-small)
45
+
46
+ - [-1, 1, Conv, [256, 3, 2]]
47
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
48
+ - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
49
+
50
+ - [-1, 1, Conv, [512, 3, 2]]
51
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
52
+ - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
53
+
54
+ - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
ultralytics/cfg/models/v8/yolov8-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0x6 backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [768, True]]
26
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C2f, [1024, True]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv8.0x6 head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
33
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34
+ - [-1, 3, C2, [768, False]] # 14
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 3, C2, [512, False]] # 17
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
46
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
50
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
51
+
52
+ - [-1, 1, Conv, [768, 3, 2]]
53
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
54
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
55
+
56
+ - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
3
+
4
+ # Parameters
5
+ nc: 1 # number of classes
6
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv8.0x6 backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C2f, [128, True]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C2f, [256, True]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C2f, [512, True]]
25
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C2f, [768, True]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
28
+ - [-1, 3, C2f, [1024, True]]
29
+ - [-1, 1, SPPF, [1024, 5]] # 11
30
+
31
+ # YOLOv8.0x6 head
32
+ head:
33
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
35
+ - [-1, 3, C2, [768, False]] # 14
36
+
37
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 3, C2, [512, False]] # 17
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
47
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
51
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
52
+
53
+ - [-1, 1, Conv, [768, 3, 2]]
54
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
55
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
56
+
57
+ - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
ultralytics/cfg/models/v8/yolov8-pose.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
3
+
4
+ # Parameters
5
+ nc: 1 # number of classes
6
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv8.0n backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C2f, [128, True]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C2f, [256, True]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C2f, [512, True]]
25
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C2f, [1024, True]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv8.0n head
30
+ head:
31
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
32
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 3, C2f, [512]] # 12
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
36
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
38
+
39
+ - [-1, 1, Conv, [256, 3, 2]]
40
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
41
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
42
+
43
+ - [-1, 1, Conv, [512, 3, 2]]
44
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
45
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
46
+
47
+ - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0x6 backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [768, True]]
26
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C2f, [1024, True]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv8.0x6 head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
33
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34
+ - [-1, 3, C2, [768, False]] # 14
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 3, C2, [512, False]] # 17
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
46
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
50
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
51
+
52
+ - [-1, 1, Conv, [768, 3, 2]]
53
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
54
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
55
+
56
+ - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)
ultralytics/cfg/models/v8/yolov8-seg.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
ultralytics/cfg/models/v8/yolov8.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
ultralytics/cfg/trackers/botsort.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
3
+
4
+ tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
5
+ track_high_thresh: 0.5 # threshold for the first association
6
+ track_low_thresh: 0.1 # threshold for the second association
7
+ new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
8
+ track_buffer: 30 # buffer to calculate the time when to remove tracks
9
+ match_thresh: 0.8 # threshold for matching tracks
10
+ # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
11
+ # mot20: False # for tracker evaluation(not used for now)
12
+
13
+ # BoT-SORT settings
14
+ gmc_method: sparseOptFlow # method of global motion compensation
15
+ # ReID model related thresh (not supported yet)
16
+ proximity_thresh: 0.5
17
+ appearance_thresh: 0.25
18
+ with_reid: False
ultralytics/cfg/trackers/bytetrack.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
3
+
4
+ tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
5
+ track_high_thresh: 0.5 # threshold for the first association
6
+ track_low_thresh: 0.1 # threshold for the second association
7
+ new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
8
+ track_buffer: 30 # buffer to calculate the time when to remove tracks
9
+ match_thresh: 0.8 # threshold for matching tracks
10
+ # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
11
+ # mot20: False # for tracker evaluation(not used for now)
ultralytics/data/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ from .base import BaseDataset
4
+ from .build import build_dataloader, build_yolo_dataset, load_inference_source
5
+ from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
6
+
7
+ __all__ = ('BaseDataset', 'ClassificationDataset', 'SemanticDataset', 'YOLODataset', 'build_yolo_dataset',
8
+ 'build_dataloader', 'load_inference_source')
ultralytics/data/annotator.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ from pathlib import Path
4
+
5
+ from ultralytics import SAM, YOLO
6
+
7
+
8
+ def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
9
+ """
10
+ Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
11
+
12
+ Args:
13
+ data (str): Path to a folder containing images to be annotated.
14
+ det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
15
+ sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
16
+ device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
17
+ output_dir (str | None | optional): Directory to save the annotated results.
18
+ Defaults to a 'labels' folder in the same directory as 'data'.
19
+
20
+ Example:
21
+ ```python
22
+ from ultralytics.data.annotator import auto_annotate
23
+
24
+ auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
25
+ ```
26
+ """
27
+ det_model = YOLO(det_model)
28
+ sam_model = SAM(sam_model)
29
+
30
+ data = Path(data)
31
+ if not output_dir:
32
+ output_dir = data.parent / f'{data.stem}_auto_annotate_labels'
33
+ Path(output_dir).mkdir(exist_ok=True, parents=True)
34
+
35
+ det_results = det_model(data, stream=True, device=device)
36
+
37
+ for result in det_results:
38
+ class_ids = result.boxes.cls.int().tolist() # noqa
39
+ if len(class_ids):
40
+ boxes = result.boxes.xyxy # Boxes object for bbox outputs
41
+ sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
42
+ segments = sam_results[0].masks.xyn # noqa
43
+
44
+ with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f:
45
+ for i in range(len(segments)):
46
+ s = segments[i]
47
+ if len(s) == 0:
48
+ continue
49
+ segment = map(str, segments[i].reshape(-1).tolist())
50
+ f.write(f'{class_ids[i]} ' + ' '.join(segment) + '\n')
ultralytics/data/augment.py ADDED
@@ -0,0 +1,1107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ import math
4
+ import random
5
+ from copy import deepcopy
6
+
7
+ import cv2
8
+ import numpy as np
9
+ import torch
10
+ import torchvision.transforms as T
11
+
12
+ from ultralytics.utils import LOGGER, colorstr
13
+ from ultralytics.utils.checks import check_version
14
+ from ultralytics.utils.instance import Instances
15
+ from ultralytics.utils.metrics import bbox_ioa
16
+ from ultralytics.utils.ops import segment2box
17
+
18
+ from .utils import polygons2masks, polygons2masks_overlap
19
+
20
+
21
+ # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
22
+ class BaseTransform:
23
+ """
24
+ Base class for image transformations.
25
+
26
+ This is a generic transformation class that can be extended for specific image processing needs.
27
+ The class is designed to be compatible with both classification and semantic segmentation tasks.
28
+
29
+ Methods:
30
+ __init__: Initializes the BaseTransform object.
31
+ apply_image: Applies image transformation to labels.
32
+ apply_instances: Applies transformations to object instances in labels.
33
+ apply_semantic: Applies semantic segmentation to an image.
34
+ __call__: Applies all label transformations to an image, instances, and semantic masks.
35
+ """
36
+
37
+ def __init__(self) -> None:
38
+ """Initializes the BaseTransform object."""
39
+ pass
40
+
41
+ def apply_image(self, labels):
42
+ """Applies image transformations to labels."""
43
+ pass
44
+
45
+ def apply_instances(self, labels):
46
+ """Applies transformations to object instances in labels."""
47
+ pass
48
+
49
+ def apply_semantic(self, labels):
50
+ """Applies semantic segmentation to an image."""
51
+ pass
52
+
53
+ def __call__(self, labels):
54
+ """Applies all label transformations to an image, instances, and semantic masks."""
55
+ self.apply_image(labels)
56
+ self.apply_instances(labels)
57
+ self.apply_semantic(labels)
58
+
59
+
60
+ class Compose:
61
+ """Class for composing multiple image transformations."""
62
+
63
+ def __init__(self, transforms):
64
+ """Initializes the Compose object with a list of transforms."""
65
+ self.transforms = transforms
66
+
67
+ def __call__(self, data):
68
+ """Applies a series of transformations to input data."""
69
+ for t in self.transforms:
70
+ data = t(data)
71
+ return data
72
+
73
+ def append(self, transform):
74
+ """Appends a new transform to the existing list of transforms."""
75
+ self.transforms.append(transform)
76
+
77
+ def tolist(self):
78
+ """Converts the list of transforms to a standard Python list."""
79
+ return self.transforms
80
+
81
+ def __repr__(self):
82
+ """Returns a string representation of the object."""
83
+ return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
84
+
85
+
86
+ class BaseMixTransform:
87
+ """
88
+ Class for base mix (MixUp/Mosaic) transformations.
89
+
90
+ This implementation is from mmyolo.
91
+ """
92
+
93
+ def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
94
+ """Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
95
+ self.dataset = dataset
96
+ self.pre_transform = pre_transform
97
+ self.p = p
98
+
99
+ def __call__(self, labels):
100
+ """Applies pre-processing transforms and mixup/mosaic transforms to labels data."""
101
+ if random.uniform(0, 1) > self.p:
102
+ return labels
103
+
104
+ # Get index of one or three other images
105
+ indexes = self.get_indexes()
106
+ if isinstance(indexes, int):
107
+ indexes = [indexes]
108
+
109
+ # Get images information will be used for Mosaic or MixUp
110
+ mix_labels = [self.dataset.get_image_and_label(i) for i in indexes]
111
+
112
+ if self.pre_transform is not None:
113
+ for i, data in enumerate(mix_labels):
114
+ mix_labels[i] = self.pre_transform(data)
115
+ labels['mix_labels'] = mix_labels
116
+
117
+ # Mosaic or MixUp
118
+ labels = self._mix_transform(labels)
119
+ labels.pop('mix_labels', None)
120
+ return labels
121
+
122
+ def _mix_transform(self, labels):
123
+ """Applies MixUp or Mosaic augmentation to the label dictionary."""
124
+ raise NotImplementedError
125
+
126
+ def get_indexes(self):
127
+ """Gets a list of shuffled indexes for mosaic augmentation."""
128
+ raise NotImplementedError
129
+
130
+
131
+ class Mosaic(BaseMixTransform):
132
+ """
133
+ Mosaic augmentation.
134
+
135
+ This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
136
+ The augmentation is applied to a dataset with a given probability.
137
+
138
+ Attributes:
139
+ dataset: The dataset on which the mosaic augmentation is applied.
140
+ imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640.
141
+ p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0.
142
+ n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3).
143
+ """
144
+
145
+ def __init__(self, dataset, imgsz=640, p=1.0, n=4):
146
+ """Initializes the object with a dataset, image size, probability, and border."""
147
+ assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
148
+ assert n in (4, 9), 'grid must be equal to 4 or 9.'
149
+ super().__init__(dataset=dataset, p=p)
150
+ self.dataset = dataset
151
+ self.imgsz = imgsz
152
+ self.border = (-imgsz // 2, -imgsz // 2) # width, height
153
+ self.n = n
154
+
155
+ def get_indexes(self, buffer=True):
156
+ """Return a list of random indexes from the dataset."""
157
+ if buffer: # select images from buffer
158
+ return random.choices(list(self.dataset.buffer), k=self.n - 1)
159
+ else: # select any images
160
+ return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
161
+
162
+ def _mix_transform(self, labels):
163
+ """Apply mixup transformation to the input image and labels."""
164
+ assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.'
165
+ assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.'
166
+ return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)
167
+
168
+ def _mosaic4(self, labels):
169
+ """Create a 2x2 image mosaic."""
170
+ mosaic_labels = []
171
+ s = self.imgsz
172
+ yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border) # mosaic center x, y
173
+ for i in range(4):
174
+ labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
175
+ # Load image
176
+ img = labels_patch['img']
177
+ h, w = labels_patch.pop('resized_shape')
178
+
179
+ # Place img in img4
180
+ if i == 0: # top left
181
+ img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
182
+ x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
183
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
184
+ elif i == 1: # top right
185
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
186
+ x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
187
+ elif i == 2: # bottom left
188
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
189
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
190
+ elif i == 3: # bottom right
191
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
192
+ x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
193
+
194
+ img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
195
+ padw = x1a - x1b
196
+ padh = y1a - y1b
197
+
198
+ labels_patch = self._update_labels(labels_patch, padw, padh)
199
+ mosaic_labels.append(labels_patch)
200
+ final_labels = self._cat_labels(mosaic_labels)
201
+ final_labels['img'] = img4
202
+ return final_labels
203
+
204
+ def _mosaic9(self, labels):
205
+ """Create a 3x3 image mosaic."""
206
+ mosaic_labels = []
207
+ s = self.imgsz
208
+ hp, wp = -1, -1 # height, width previous
209
+ for i in range(9):
210
+ labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
211
+ # Load image
212
+ img = labels_patch['img']
213
+ h, w = labels_patch.pop('resized_shape')
214
+
215
+ # Place img in img9
216
+ if i == 0: # center
217
+ img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
218
+ h0, w0 = h, w
219
+ c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
220
+ elif i == 1: # top
221
+ c = s, s - h, s + w, s
222
+ elif i == 2: # top right
223
+ c = s + wp, s - h, s + wp + w, s
224
+ elif i == 3: # right
225
+ c = s + w0, s, s + w0 + w, s + h
226
+ elif i == 4: # bottom right
227
+ c = s + w0, s + hp, s + w0 + w, s + hp + h
228
+ elif i == 5: # bottom
229
+ c = s + w0 - w, s + h0, s + w0, s + h0 + h
230
+ elif i == 6: # bottom left
231
+ c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
232
+ elif i == 7: # left
233
+ c = s - w, s + h0 - h, s, s + h0
234
+ elif i == 8: # top left
235
+ c = s - w, s + h0 - hp - h, s, s + h0 - hp
236
+
237
+ padw, padh = c[:2]
238
+ x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords
239
+
240
+ # Image
241
+ img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:] # img9[ymin:ymax, xmin:xmax]
242
+ hp, wp = h, w # height, width previous for next iteration
243
+
244
+ # Labels assuming imgsz*2 mosaic size
245
+ labels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1])
246
+ mosaic_labels.append(labels_patch)
247
+ final_labels = self._cat_labels(mosaic_labels)
248
+
249
+ final_labels['img'] = img9[-self.border[0]:self.border[0], -self.border[1]:self.border[1]]
250
+ return final_labels
251
+
252
+ @staticmethod
253
+ def _update_labels(labels, padw, padh):
254
+ """Update labels."""
255
+ nh, nw = labels['img'].shape[:2]
256
+ labels['instances'].convert_bbox(format='xyxy')
257
+ labels['instances'].denormalize(nw, nh)
258
+ labels['instances'].add_padding(padw, padh)
259
+ return labels
260
+
261
+ def _cat_labels(self, mosaic_labels):
262
+ """Return labels with mosaic border instances clipped."""
263
+ if len(mosaic_labels) == 0:
264
+ return {}
265
+ cls = []
266
+ instances = []
267
+ imgsz = self.imgsz * 2 # mosaic imgsz
268
+ for labels in mosaic_labels:
269
+ cls.append(labels['cls'])
270
+ instances.append(labels['instances'])
271
+ final_labels = {
272
+ 'im_file': mosaic_labels[0]['im_file'],
273
+ 'ori_shape': mosaic_labels[0]['ori_shape'],
274
+ 'resized_shape': (imgsz, imgsz),
275
+ 'cls': np.concatenate(cls, 0),
276
+ 'instances': Instances.concatenate(instances, axis=0),
277
+ 'mosaic_border': self.border} # final_labels
278
+ final_labels['instances'].clip(imgsz, imgsz)
279
+ good = final_labels['instances'].remove_zero_area_boxes()
280
+ final_labels['cls'] = final_labels['cls'][good]
281
+ return final_labels
282
+
283
+
284
+ class MixUp(BaseMixTransform):
285
+ """Class for applying MixUp augmentation to the dataset."""
286
+
287
+ def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
288
+ """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
289
+ super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
290
+
291
+ def get_indexes(self):
292
+ """Get a random index from the dataset."""
293
+ return random.randint(0, len(self.dataset) - 1)
294
+
295
+ def _mix_transform(self, labels):
296
+ """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
297
+ r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
298
+ labels2 = labels['mix_labels'][0]
299
+ labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
300
+ labels['instances'] = Instances.concatenate([labels['instances'], labels2['instances']], axis=0)
301
+ labels['cls'] = np.concatenate([labels['cls'], labels2['cls']], 0)
302
+ return labels
303
+
304
+
305
+ class RandomPerspective:
306
+ """
307
+ Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
308
+ keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
309
+ option to apply these transformations conditionally with a specified probability.
310
+
311
+ Attributes:
312
+ degrees (float): Degree range for random rotations.
313
+ translate (float): Fraction of total width and height for random translation.
314
+ scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
315
+ shear (float): Shear intensity (angle in degrees).
316
+ perspective (float): Perspective distortion factor.
317
+ border (tuple): Tuple specifying mosaic border.
318
+ pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
319
+
320
+ Methods:
321
+ affine_transform(img, border): Applies a series of affine transformations to the image.
322
+ apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
323
+ apply_segments(segments, M): Transforms segments and generates new bounding boxes.
324
+ apply_keypoints(keypoints, M): Transforms keypoints.
325
+ __call__(labels): Main method to apply transformations to both images and their corresponding annotations.
326
+ box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
327
+ """
328
+
329
+ def __init__(self,
330
+ degrees=0.0,
331
+ translate=0.1,
332
+ scale=0.5,
333
+ shear=0.0,
334
+ perspective=0.0,
335
+ border=(0, 0),
336
+ pre_transform=None):
337
+ """Initializes RandomPerspective object with transformation parameters."""
338
+
339
+ self.degrees = degrees
340
+ self.translate = translate
341
+ self.scale = scale
342
+ self.shear = shear
343
+ self.perspective = perspective
344
+ self.border = border # mosaic border
345
+ self.pre_transform = pre_transform
346
+
347
+ def affine_transform(self, img, border):
348
+ """
349
+ Applies a sequence of affine transformations centered around the image center.
350
+
351
+ Args:
352
+ img (ndarray): Input image.
353
+ border (tuple): Border dimensions.
354
+
355
+ Returns:
356
+ img (ndarray): Transformed image.
357
+ M (ndarray): Transformation matrix.
358
+ s (float): Scale factor.
359
+ """
360
+
361
+ # Center
362
+ C = np.eye(3, dtype=np.float32)
363
+
364
+ C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
365
+ C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
366
+
367
+ # Perspective
368
+ P = np.eye(3, dtype=np.float32)
369
+ P[2, 0] = random.uniform(-self.perspective, self.perspective) # x perspective (about y)
370
+ P[2, 1] = random.uniform(-self.perspective, self.perspective) # y perspective (about x)
371
+
372
+ # Rotation and Scale
373
+ R = np.eye(3, dtype=np.float32)
374
+ a = random.uniform(-self.degrees, self.degrees)
375
+ # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
376
+ s = random.uniform(1 - self.scale, 1 + self.scale)
377
+ # s = 2 ** random.uniform(-scale, scale)
378
+ R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
379
+
380
+ # Shear
381
+ S = np.eye(3, dtype=np.float32)
382
+ S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # x shear (deg)
383
+ S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180) # y shear (deg)
384
+
385
+ # Translation
386
+ T = np.eye(3, dtype=np.float32)
387
+ T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0] # x translation (pixels)
388
+ T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1] # y translation (pixels)
389
+
390
+ # Combined rotation matrix
391
+ M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
392
+ # Affine image
393
+ if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
394
+ if self.perspective:
395
+ img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114))
396
+ else: # affine
397
+ img = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114))
398
+ return img, M, s
399
+
400
+ def apply_bboxes(self, bboxes, M):
401
+ """
402
+ Apply affine to bboxes only.
403
+
404
+ Args:
405
+ bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4).
406
+ M (ndarray): affine matrix.
407
+
408
+ Returns:
409
+ new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4].
410
+ """
411
+ n = len(bboxes)
412
+ if n == 0:
413
+ return bboxes
414
+
415
+ xy = np.ones((n * 4, 3), dtype=bboxes.dtype)
416
+ xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
417
+ xy = xy @ M.T # transform
418
+ xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
419
+
420
+ # Create new boxes
421
+ x = xy[:, [0, 2, 4, 6]]
422
+ y = xy[:, [1, 3, 5, 7]]
423
+ return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
424
+
425
+ def apply_segments(self, segments, M):
426
+ """
427
+ Apply affine to segments and generate new bboxes from segments.
428
+
429
+ Args:
430
+ segments (ndarray): list of segments, [num_samples, 500, 2].
431
+ M (ndarray): affine matrix.
432
+
433
+ Returns:
434
+ new_segments (ndarray): list of segments after affine, [num_samples, 500, 2].
435
+ new_bboxes (ndarray): bboxes after affine, [N, 4].
436
+ """
437
+ n, num = segments.shape[:2]
438
+ if n == 0:
439
+ return [], segments
440
+
441
+ xy = np.ones((n * num, 3), dtype=segments.dtype)
442
+ segments = segments.reshape(-1, 2)
443
+ xy[:, :2] = segments
444
+ xy = xy @ M.T # transform
445
+ xy = xy[:, :2] / xy[:, 2:3]
446
+ segments = xy.reshape(n, -1, 2)
447
+ bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0)
448
+ return bboxes, segments
449
+
450
+ def apply_keypoints(self, keypoints, M):
451
+ """
452
+ Apply affine to keypoints.
453
+
454
+ Args:
455
+ keypoints (ndarray): keypoints, [N, 17, 3].
456
+ M (ndarray): affine matrix.
457
+
458
+ Returns:
459
+ new_keypoints (ndarray): keypoints after affine, [N, 17, 3].
460
+ """
461
+ n, nkpt = keypoints.shape[:2]
462
+ if n == 0:
463
+ return keypoints
464
+ xy = np.ones((n * nkpt, 3), dtype=keypoints.dtype)
465
+ visible = keypoints[..., 2].reshape(n * nkpt, 1)
466
+ xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
467
+ xy = xy @ M.T # transform
468
+ xy = xy[:, :2] / xy[:, 2:3] # perspective rescale or affine
469
+ out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
470
+ visible[out_mask] = 0
471
+ return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
472
+
473
+ def __call__(self, labels):
474
+ """
475
+ Affine images and targets.
476
+
477
+ Args:
478
+ labels (dict): a dict of `bboxes`, `segments`, `keypoints`.
479
+ """
480
+ if self.pre_transform and 'mosaic_border' not in labels:
481
+ labels = self.pre_transform(labels)
482
+ labels.pop('ratio_pad', None) # do not need ratio pad
483
+
484
+ img = labels['img']
485
+ cls = labels['cls']
486
+ instances = labels.pop('instances')
487
+ # Make sure the coord formats are right
488
+ instances.convert_bbox(format='xyxy')
489
+ instances.denormalize(*img.shape[:2][::-1])
490
+
491
+ border = labels.pop('mosaic_border', self.border)
492
+ self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2 # w, h
493
+ # M is affine matrix
494
+ # Scale for func:`box_candidates`
495
+ img, M, scale = self.affine_transform(img, border)
496
+
497
+ bboxes = self.apply_bboxes(instances.bboxes, M)
498
+
499
+ segments = instances.segments
500
+ keypoints = instances.keypoints
501
+ # Update bboxes if there are segments.
502
+ if len(segments):
503
+ bboxes, segments = self.apply_segments(segments, M)
504
+
505
+ if keypoints is not None:
506
+ keypoints = self.apply_keypoints(keypoints, M)
507
+ new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False)
508
+ # Clip
509
+ new_instances.clip(*self.size)
510
+
511
+ # Filter instances
512
+ instances.scale(scale_w=scale, scale_h=scale, bbox_only=True)
513
+ # Make the bboxes have the same scale with new_bboxes
514
+ i = self.box_candidates(box1=instances.bboxes.T,
515
+ box2=new_instances.bboxes.T,
516
+ area_thr=0.01 if len(segments) else 0.10)
517
+ labels['instances'] = new_instances[i]
518
+ labels['cls'] = cls[i]
519
+ labels['img'] = img
520
+ labels['resized_shape'] = img.shape[:2]
521
+ return labels
522
+
523
+ def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
524
+ """
525
+ Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
526
+ before and after augmentation to decide whether a box is a candidate for further processing.
527
+
528
+ Args:
529
+ box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
530
+ box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
531
+ wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
532
+ ar_thr (float, optional): The aspect ratio threshold. Default is 100.
533
+ area_thr (float, optional): The area ratio threshold. Default is 0.1.
534
+ eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
535
+
536
+ Returns:
537
+ (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
538
+ """
539
+ w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
540
+ w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
541
+ ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
542
+ return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
543
+
544
+
545
+ class RandomHSV:
546
+ """
547
+ This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
548
+ image.
549
+
550
+ The adjustments are random but within limits set by hgain, sgain, and vgain.
551
+ """
552
+
553
+ def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
554
+ """
555
+ Initialize RandomHSV class with gains for each HSV channel.
556
+
557
+ Args:
558
+ hgain (float, optional): Maximum variation for hue. Default is 0.5.
559
+ sgain (float, optional): Maximum variation for saturation. Default is 0.5.
560
+ vgain (float, optional): Maximum variation for value. Default is 0.5.
561
+ """
562
+ self.hgain = hgain
563
+ self.sgain = sgain
564
+ self.vgain = vgain
565
+
566
+ def __call__(self, labels):
567
+ """
568
+ Applies random HSV augmentation to an image within the predefined limits.
569
+
570
+ The modified image replaces the original image in the input 'labels' dict.
571
+ """
572
+ img = labels['img']
573
+ if self.hgain or self.sgain or self.vgain:
574
+ r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1 # random gains
575
+ hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
576
+ dtype = img.dtype # uint8
577
+
578
+ x = np.arange(0, 256, dtype=r.dtype)
579
+ lut_hue = ((x * r[0]) % 180).astype(dtype)
580
+ lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
581
+ lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
582
+
583
+ im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
584
+ cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
585
+ return labels
586
+
587
+
588
+ class RandomFlip:
589
+ """
590
+ Applies a random horizontal or vertical flip to an image with a given probability.
591
+
592
+ Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
593
+ """
594
+
595
+ def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
596
+ """
597
+ Initializes the RandomFlip class with probability and direction.
598
+
599
+ Args:
600
+ p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
601
+ direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
602
+ Default is 'horizontal'.
603
+ flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
604
+ """
605
+ assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
606
+ assert 0 <= p <= 1.0
607
+
608
+ self.p = p
609
+ self.direction = direction
610
+ self.flip_idx = flip_idx
611
+
612
+ def __call__(self, labels):
613
+ """
614
+ Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
615
+
616
+ Args:
617
+ labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
618
+ 'instances' is an object containing bounding boxes and optionally keypoints.
619
+
620
+ Returns:
621
+ (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
622
+ """
623
+ img = labels['img']
624
+ instances = labels.pop('instances')
625
+ instances.convert_bbox(format='xywh')
626
+ h, w = img.shape[:2]
627
+ h = 1 if instances.normalized else h
628
+ w = 1 if instances.normalized else w
629
+
630
+ # Flip up-down
631
+ if self.direction == 'vertical' and random.random() < self.p:
632
+ img = np.flipud(img)
633
+ instances.flipud(h)
634
+ if self.direction == 'horizontal' and random.random() < self.p:
635
+ img = np.fliplr(img)
636
+ instances.fliplr(w)
637
+ # For keypoints
638
+ if self.flip_idx is not None and instances.keypoints is not None:
639
+ instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
640
+ labels['img'] = np.ascontiguousarray(img)
641
+ labels['instances'] = instances
642
+ return labels
643
+
644
+
645
+ class LetterBox:
646
+ """Resize image and padding for detection, instance segmentation, pose."""
647
+
648
+ def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32):
649
+ """Initialize LetterBox object with specific parameters."""
650
+ self.new_shape = new_shape
651
+ self.auto = auto
652
+ self.scaleFill = scaleFill
653
+ self.scaleup = scaleup
654
+ self.stride = stride
655
+ self.center = center # Put the image in the middle or top-left
656
+
657
+ def __call__(self, labels=None, image=None):
658
+ """Return updated labels and image with added border."""
659
+ if labels is None:
660
+ labels = {}
661
+ img = labels.get('img') if image is None else image
662
+ shape = img.shape[:2] # current shape [height, width]
663
+ new_shape = labels.pop('rect_shape', self.new_shape)
664
+ if isinstance(new_shape, int):
665
+ new_shape = (new_shape, new_shape)
666
+
667
+ # Scale ratio (new / old)
668
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
669
+ if not self.scaleup: # only scale down, do not scale up (for better val mAP)
670
+ r = min(r, 1.0)
671
+
672
+ # Compute padding
673
+ ratio = r, r # width, height ratios
674
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
675
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
676
+ if self.auto: # minimum rectangle
677
+ dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh padding
678
+ elif self.scaleFill: # stretch
679
+ dw, dh = 0.0, 0.0
680
+ new_unpad = (new_shape[1], new_shape[0])
681
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
682
+
683
+ if self.center:
684
+ dw /= 2 # divide padding into 2 sides
685
+ dh /= 2
686
+
687
+ if shape[::-1] != new_unpad: # resize
688
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
689
+ top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
690
+ left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
691
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
692
+ value=(114, 114, 114)) # add border
693
+ if labels.get('ratio_pad'):
694
+ labels['ratio_pad'] = (labels['ratio_pad'], (left, top)) # for evaluation
695
+
696
+ if len(labels):
697
+ labels = self._update_labels(labels, ratio, dw, dh)
698
+ labels['img'] = img
699
+ labels['resized_shape'] = new_shape
700
+ return labels
701
+ else:
702
+ return img
703
+
704
+ def _update_labels(self, labels, ratio, padw, padh):
705
+ """Update labels."""
706
+ labels['instances'].convert_bbox(format='xyxy')
707
+ labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
708
+ labels['instances'].scale(*ratio)
709
+ labels['instances'].add_padding(padw, padh)
710
+ return labels
711
+
712
+
713
+ class CopyPaste:
714
+ """
715
+ Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
716
+ responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
717
+ """
718
+
719
+ def __init__(self, p=0.5) -> None:
720
+ """
721
+ Initializes the CopyPaste class with a given probability.
722
+
723
+ Args:
724
+ p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
725
+ Default is 0.5.
726
+ """
727
+ self.p = p
728
+
729
+ def __call__(self, labels):
730
+ """
731
+ Applies the Copy-Paste augmentation to the given image and instances.
732
+
733
+ Args:
734
+ labels (dict): A dictionary containing:
735
+ - 'img': The image to augment.
736
+ - 'cls': Class labels associated with the instances.
737
+ - 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
738
+
739
+ Returns:
740
+ (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
741
+
742
+ Notes:
743
+ 1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
744
+ 2. This method modifies the input dictionary 'labels' in place.
745
+ """
746
+ im = labels['img']
747
+ cls = labels['cls']
748
+ h, w = im.shape[:2]
749
+ instances = labels.pop('instances')
750
+ instances.convert_bbox(format='xyxy')
751
+ instances.denormalize(w, h)
752
+ if self.p and len(instances.segments):
753
+ n = len(instances)
754
+ _, w, _ = im.shape # height, width, channels
755
+ im_new = np.zeros(im.shape, np.uint8)
756
+
757
+ # Calculate ioa first then select indexes randomly
758
+ ins_flip = deepcopy(instances)
759
+ ins_flip.fliplr(w)
760
+
761
+ ioa = bbox_ioa(ins_flip.bboxes, instances.bboxes) # intersection over area, (N, M)
762
+ indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, )
763
+ n = len(indexes)
764
+ for j in random.sample(list(indexes), k=round(self.p * n)):
765
+ cls = np.concatenate((cls, cls[[j]]), axis=0)
766
+ instances = Instances.concatenate((instances, ins_flip[[j]]), axis=0)
767
+ cv2.drawContours(im_new, instances.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED)
768
+
769
+ result = cv2.flip(im, 1) # augment segments (flip left-right)
770
+ i = cv2.flip(im_new, 1).astype(bool)
771
+ im[i] = result[i]
772
+
773
+ labels['img'] = im
774
+ labels['cls'] = cls
775
+ labels['instances'] = instances
776
+ return labels
777
+
778
+
779
+ class Albumentations:
780
+ """
781
+ Albumentations transformations.
782
+
783
+ Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
784
+ Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
785
+ compression.
786
+ """
787
+
788
+ def __init__(self, p=1.0):
789
+ """Initialize the transform object for YOLO bbox formatted params."""
790
+ self.p = p
791
+ self.transform = None
792
+ prefix = colorstr('albumentations: ')
793
+ try:
794
+ import albumentations as A
795
+
796
+ check_version(A.__version__, '1.0.3', hard=True) # version requirement
797
+
798
+ T = [
799
+ A.Blur(p=0.01),
800
+ A.MedianBlur(p=0.01),
801
+ A.ToGray(p=0.01),
802
+ A.CLAHE(p=0.01),
803
+ A.RandomBrightnessContrast(p=0.0),
804
+ A.RandomGamma(p=0.0),
805
+ A.ImageCompression(quality_lower=75, p=0.0)] # transforms
806
+ self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
807
+
808
+ LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
809
+ except ImportError: # package not installed, skip
810
+ pass
811
+ except Exception as e:
812
+ LOGGER.info(f'{prefix}{e}')
813
+
814
+ def __call__(self, labels):
815
+ """Generates object detections and returns a dictionary with detection results."""
816
+ im = labels['img']
817
+ cls = labels['cls']
818
+ if len(cls):
819
+ labels['instances'].convert_bbox('xywh')
820
+ labels['instances'].normalize(*im.shape[:2][::-1])
821
+ bboxes = labels['instances'].bboxes
822
+ # TODO: add supports of segments and keypoints
823
+ if self.transform and random.random() < self.p:
824
+ new = self.transform(image=im, bboxes=bboxes, class_labels=cls) # transformed
825
+ if len(new['class_labels']) > 0: # skip update if no bbox in new im
826
+ labels['img'] = new['image']
827
+ labels['cls'] = np.array(new['class_labels'])
828
+ bboxes = np.array(new['bboxes'], dtype=np.float32)
829
+ labels['instances'].update(bboxes=bboxes)
830
+ return labels
831
+
832
+
833
+ # TODO: technically this is not an augmentation, maybe we should put this to another files
834
+ class Format:
835
+ """
836
+ Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
837
+ standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
838
+
839
+ Attributes:
840
+ bbox_format (str): Format for bounding boxes. Default is 'xywh'.
841
+ normalize (bool): Whether to normalize bounding boxes. Default is True.
842
+ return_mask (bool): Return instance masks for segmentation. Default is False.
843
+ return_keypoint (bool): Return keypoints for pose estimation. Default is False.
844
+ mask_ratio (int): Downsample ratio for masks. Default is 4.
845
+ mask_overlap (bool): Whether to overlap masks. Default is True.
846
+ batch_idx (bool): Keep batch indexes. Default is True.
847
+ """
848
+
849
+ def __init__(self,
850
+ bbox_format='xywh',
851
+ normalize=True,
852
+ return_mask=False,
853
+ return_keypoint=False,
854
+ mask_ratio=4,
855
+ mask_overlap=True,
856
+ batch_idx=True):
857
+ """Initializes the Format class with given parameters."""
858
+ self.bbox_format = bbox_format
859
+ self.normalize = normalize
860
+ self.return_mask = return_mask # set False when training detection only
861
+ self.return_keypoint = return_keypoint
862
+ self.mask_ratio = mask_ratio
863
+ self.mask_overlap = mask_overlap
864
+ self.batch_idx = batch_idx # keep the batch indexes
865
+
866
+ def __call__(self, labels):
867
+ """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'."""
868
+ img = labels.pop('img')
869
+ h, w = img.shape[:2]
870
+ cls = labels.pop('cls')
871
+ instances = labels.pop('instances')
872
+ instances.convert_bbox(format=self.bbox_format)
873
+ instances.denormalize(w, h)
874
+ nl = len(instances)
875
+
876
+ if self.return_mask:
877
+ if nl:
878
+ masks, instances, cls = self._format_segments(instances, cls, w, h)
879
+ masks = torch.from_numpy(masks)
880
+ else:
881
+ masks = torch.zeros(1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio,
882
+ img.shape[1] // self.mask_ratio)
883
+ labels['masks'] = masks
884
+ if self.normalize:
885
+ instances.normalize(w, h)
886
+ labels['img'] = self._format_img(img)
887
+ labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
888
+ labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
889
+ if self.return_keypoint:
890
+ labels['keypoints'] = torch.from_numpy(instances.keypoints)
891
+ # Then we can use collate_fn
892
+ if self.batch_idx:
893
+ labels['batch_idx'] = torch.zeros(nl)
894
+ return labels
895
+
896
+ def _format_img(self, img):
897
+ """Format the image for YOLO from Numpy array to PyTorch tensor."""
898
+ if len(img.shape) < 3:
899
+ img = np.expand_dims(img, -1)
900
+ img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
901
+ img = torch.from_numpy(img)
902
+ return img
903
+
904
+ def _format_segments(self, instances, cls, w, h):
905
+ """Convert polygon points to bitmap."""
906
+ segments = instances.segments
907
+ if self.mask_overlap:
908
+ masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
909
+ masks = masks[None] # (640, 640) -> (1, 640, 640)
910
+ instances = instances[sorted_idx]
911
+ cls = cls[sorted_idx]
912
+ else:
913
+ masks = polygons2masks((h, w), segments, color=1, downsample_ratio=self.mask_ratio)
914
+
915
+ return masks, instances, cls
916
+
917
+
918
+ def v8_transforms(dataset, imgsz, hyp, stretch=False):
919
+ """Convert images to a size suitable for YOLOv8 training."""
920
+ pre_transform = Compose([
921
+ Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic),
922
+ CopyPaste(p=hyp.copy_paste),
923
+ RandomPerspective(
924
+ degrees=hyp.degrees,
925
+ translate=hyp.translate,
926
+ scale=hyp.scale,
927
+ shear=hyp.shear,
928
+ perspective=hyp.perspective,
929
+ pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
930
+ )])
931
+ flip_idx = dataset.data.get('flip_idx', []) # for keypoints augmentation
932
+ if dataset.use_keypoints:
933
+ kpt_shape = dataset.data.get('kpt_shape', None)
934
+ if len(flip_idx) == 0 and hyp.fliplr > 0.0:
935
+ hyp.fliplr = 0.0
936
+ LOGGER.warning("WARNING ⚠️ No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'")
937
+ elif flip_idx and (len(flip_idx) != kpt_shape[0]):
938
+ raise ValueError(f'data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}')
939
+
940
+ return Compose([
941
+ pre_transform,
942
+ MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
943
+ Albumentations(p=1.0),
944
+ RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
945
+ RandomFlip(direction='vertical', p=hyp.flipud),
946
+ RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)]) # transforms
947
+
948
+
949
+ # Classification augmentations -----------------------------------------------------------------------------------------
950
+ def classify_transforms(size=224, rect=False, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)): # IMAGENET_MEAN, IMAGENET_STD
951
+ """Transforms to apply if albumentations not installed."""
952
+ if not isinstance(size, int):
953
+ raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)')
954
+ transforms = [ClassifyLetterBox(size, auto=True) if rect else CenterCrop(size), ToTensor()]
955
+ if any(mean) or any(std):
956
+ transforms.append(T.Normalize(mean, std, inplace=True))
957
+ return T.Compose(transforms)
958
+
959
+
960
+ def hsv2colorjitter(h, s, v):
961
+ """Map HSV (hue, saturation, value) jitter into ColorJitter values (brightness, contrast, saturation, hue)"""
962
+ return v, v, s, h
963
+
964
+
965
+ def classify_albumentations(
966
+ augment=True,
967
+ size=224,
968
+ scale=(0.08, 1.0),
969
+ hflip=0.5,
970
+ vflip=0.0,
971
+ hsv_h=0.015, # image HSV-Hue augmentation (fraction)
972
+ hsv_s=0.7, # image HSV-Saturation augmentation (fraction)
973
+ hsv_v=0.4, # image HSV-Value augmentation (fraction)
974
+ mean=(0.0, 0.0, 0.0), # IMAGENET_MEAN
975
+ std=(1.0, 1.0, 1.0), # IMAGENET_STD
976
+ auto_aug=False,
977
+ ):
978
+ """YOLOv8 classification Albumentations (optional, only used if package is installed)."""
979
+ prefix = colorstr('albumentations: ')
980
+ try:
981
+ import albumentations as A
982
+ from albumentations.pytorch import ToTensorV2
983
+
984
+ check_version(A.__version__, '1.0.3', hard=True) # version requirement
985
+ if augment: # Resize and crop
986
+ T = [A.RandomResizedCrop(height=size, width=size, scale=scale)]
987
+ if auto_aug:
988
+ # TODO: implement AugMix, AutoAug & RandAug in albumentations
989
+ LOGGER.info(f'{prefix}auto augmentations are currently not supported')
990
+ else:
991
+ if hflip > 0:
992
+ T += [A.HorizontalFlip(p=hflip)]
993
+ if vflip > 0:
994
+ T += [A.VerticalFlip(p=vflip)]
995
+ if any((hsv_h, hsv_s, hsv_v)):
996
+ T += [A.ColorJitter(*hsv2colorjitter(hsv_h, hsv_s, hsv_v))] # brightness, contrast, saturation, hue
997
+ else: # Use fixed crop for eval set (reproducibility)
998
+ T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
999
+ T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor
1000
+ LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
1001
+ return A.Compose(T)
1002
+
1003
+ except ImportError: # package not installed, skip
1004
+ pass
1005
+ except Exception as e:
1006
+ LOGGER.info(f'{prefix}{e}')
1007
+
1008
+
1009
+ class ClassifyLetterBox:
1010
+ """
1011
+ YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
1012
+ T.Compose([LetterBox(size), ToTensor()]).
1013
+
1014
+ Attributes:
1015
+ h (int): Target height of the image.
1016
+ w (int): Target width of the image.
1017
+ auto (bool): If True, automatically solves for short side using stride.
1018
+ stride (int): The stride value, used when 'auto' is True.
1019
+ """
1020
+
1021
+ def __init__(self, size=(640, 640), auto=False, stride=32):
1022
+ """
1023
+ Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
1024
+
1025
+ Args:
1026
+ size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
1027
+ auto (bool): If True, automatically calculates the short side based on stride.
1028
+ stride (int): The stride value, used when 'auto' is True.
1029
+ """
1030
+ super().__init__()
1031
+ self.h, self.w = (size, size) if isinstance(size, int) else size
1032
+ self.auto = auto # pass max size integer, automatically solve for short side using stride
1033
+ self.stride = stride # used with auto
1034
+
1035
+ def __call__(self, im):
1036
+ """
1037
+ Resizes the image and pads it with a letterbox method.
1038
+
1039
+ Args:
1040
+ im (numpy.ndarray): The input image as a numpy array of shape HWC.
1041
+
1042
+ Returns:
1043
+ (numpy.ndarray): The letterboxed and resized image as a numpy array.
1044
+ """
1045
+ imh, imw = im.shape[:2]
1046
+ r = min(self.h / imh, self.w / imw) # ratio of new/old dimensions
1047
+ h, w = round(imh * r), round(imw * r) # resized image dimensions
1048
+
1049
+ # Calculate padding dimensions
1050
+ hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
1051
+ top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
1052
+
1053
+ # Create padded image
1054
+ im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
1055
+ im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
1056
+ return im_out
1057
+
1058
+
1059
+ class CenterCrop:
1060
+ """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
1061
+ T.Compose([CenterCrop(size), ToTensor()]).
1062
+ """
1063
+
1064
+ def __init__(self, size=640):
1065
+ """Converts an image from numpy array to PyTorch tensor."""
1066
+ super().__init__()
1067
+ self.h, self.w = (size, size) if isinstance(size, int) else size
1068
+
1069
+ def __call__(self, im):
1070
+ """
1071
+ Resizes and crops the center of the image using a letterbox method.
1072
+
1073
+ Args:
1074
+ im (numpy.ndarray): The input image as a numpy array of shape HWC.
1075
+
1076
+ Returns:
1077
+ (numpy.ndarray): The center-cropped and resized image as a numpy array.
1078
+ """
1079
+ imh, imw = im.shape[:2]
1080
+ m = min(imh, imw) # min dimension
1081
+ top, left = (imh - m) // 2, (imw - m) // 2
1082
+ return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
1083
+
1084
+
1085
+ class ToTensor:
1086
+ """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
1087
+
1088
+ def __init__(self, half=False):
1089
+ """Initialize YOLOv8 ToTensor object with optional half-precision support."""
1090
+ super().__init__()
1091
+ self.half = half
1092
+
1093
+ def __call__(self, im):
1094
+ """
1095
+ Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
1096
+
1097
+ Args:
1098
+ im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
1099
+
1100
+ Returns:
1101
+ (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
1102
+ """
1103
+ im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous
1104
+ im = torch.from_numpy(im) # to torch
1105
+ im = im.half() if self.half else im.float() # uint8 to fp16/32
1106
+ im /= 255.0 # 0-255 to 0.0-1.0
1107
+ return im
ultralytics/data/base.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ import glob
4
+ import math
5
+ import os
6
+ import random
7
+ from copy import deepcopy
8
+ from multiprocessing.pool import ThreadPool
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ import cv2
13
+ import numpy as np
14
+ import psutil
15
+ from torch.utils.data import Dataset
16
+
17
+ from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
18
+
19
+ from .utils import HELP_URL, IMG_FORMATS
20
+
21
+
22
+ class BaseDataset(Dataset):
23
+ """
24
+ Base dataset class for loading and processing image data.
25
+
26
+ Args:
27
+ img_path (str): Path to the folder containing images.
28
+ imgsz (int, optional): Image size. Defaults to 640.
29
+ cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
30
+ augment (bool, optional): If True, data augmentation is applied. Defaults to True.
31
+ hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
32
+ prefix (str, optional): Prefix to print in log messages. Defaults to ''.
33
+ rect (bool, optional): If True, rectangular training is used. Defaults to False.
34
+ batch_size (int, optional): Size of batches. Defaults to None.
35
+ stride (int, optional): Stride. Defaults to 32.
36
+ pad (float, optional): Padding. Defaults to 0.0.
37
+ single_cls (bool, optional): If True, single class training is used. Defaults to False.
38
+ classes (list): List of included classes. Default is None.
39
+ fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
40
+
41
+ Attributes:
42
+ im_files (list): List of image file paths.
43
+ labels (list): List of label data dictionaries.
44
+ ni (int): Number of images in the dataset.
45
+ ims (list): List of loaded images.
46
+ npy_files (list): List of numpy file paths.
47
+ transforms (callable): Image transformation function.
48
+ """
49
+
50
+ def __init__(self,
51
+ img_path,
52
+ imgsz=640,
53
+ cache=False,
54
+ augment=True,
55
+ hyp=DEFAULT_CFG,
56
+ prefix='',
57
+ rect=False,
58
+ batch_size=16,
59
+ stride=32,
60
+ pad=0.5,
61
+ single_cls=False,
62
+ classes=None,
63
+ fraction=1.0):
64
+ """Initialize BaseDataset with given configuration and options."""
65
+ super().__init__()
66
+ self.img_path = img_path
67
+ self.imgsz = imgsz
68
+ self.augment = augment
69
+ self.single_cls = single_cls
70
+ self.prefix = prefix
71
+ self.fraction = fraction
72
+ self.im_files = self.get_img_files(self.img_path)
73
+ self.labels = self.get_labels()
74
+ self.update_labels(include_class=classes) # single_cls and include_class
75
+ self.ni = len(self.labels) # number of images
76
+ self.rect = rect
77
+ self.batch_size = batch_size
78
+ self.stride = stride
79
+ self.pad = pad
80
+ if self.rect:
81
+ assert self.batch_size is not None
82
+ self.set_rectangle()
83
+
84
+ # Buffer thread for mosaic images
85
+ self.buffer = [] # buffer size = batch size
86
+ self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0
87
+
88
+ # Cache images
89
+ if cache == 'ram' and not self.check_cache_ram():
90
+ cache = False
91
+ self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
92
+ self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
93
+ if cache:
94
+ self.cache_images(cache)
95
+
96
+ # Transforms
97
+ self.transforms = self.build_transforms(hyp=hyp)
98
+
99
+ def get_img_files(self, img_path):
100
+ """Read image files."""
101
+ try:
102
+ f = [] # image files
103
+ for p in img_path if isinstance(img_path, list) else [img_path]:
104
+ p = Path(p) # os-agnostic
105
+ if p.is_dir(): # dir
106
+ f += glob.glob(str(p / '**' / '*.*'), recursive=True)
107
+ # F = list(p.rglob('*.*')) # pathlib
108
+ elif p.is_file(): # file
109
+ with open(p) as t:
110
+ t = t.read().strip().splitlines()
111
+ parent = str(p.parent) + os.sep
112
+ f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
113
+ # F += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
114
+ else:
115
+ raise FileNotFoundError(f'{self.prefix}{p} does not exist')
116
+ im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
117
+ # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
118
+ assert im_files, f'{self.prefix}No images found in {img_path}'
119
+ except Exception as e:
120
+ raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e
121
+ if self.fraction < 1:
122
+ im_files = im_files[:round(len(im_files) * self.fraction)]
123
+ return im_files
124
+
125
+ def update_labels(self, include_class: Optional[list]):
126
+ """Update labels to include only these classes (optional)."""
127
+ include_class_array = np.array(include_class).reshape(1, -1)
128
+ for i in range(len(self.labels)):
129
+ if include_class is not None:
130
+ cls = self.labels[i]['cls']
131
+ bboxes = self.labels[i]['bboxes']
132
+ segments = self.labels[i]['segments']
133
+ keypoints = self.labels[i]['keypoints']
134
+ j = (cls == include_class_array).any(1)
135
+ self.labels[i]['cls'] = cls[j]
136
+ self.labels[i]['bboxes'] = bboxes[j]
137
+ if segments:
138
+ self.labels[i]['segments'] = [segments[si] for si, idx in enumerate(j) if idx]
139
+ if keypoints is not None:
140
+ self.labels[i]['keypoints'] = keypoints[j]
141
+ if self.single_cls:
142
+ self.labels[i]['cls'][:, 0] = 0
143
+
144
+ def load_image(self, i, rect_mode=True):
145
+ """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
146
+ im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
147
+ if im is None: # not cached in RAM
148
+ if fn.exists(): # load npy
149
+ try:
150
+ im = np.load(fn)
151
+ except Exception as e:
152
+ LOGGER.warning(f'{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}')
153
+ Path(fn).unlink(missing_ok=True)
154
+ im = cv2.imread(f) # BGR
155
+ else: # read image
156
+ im = cv2.imread(f) # BGR
157
+ if im is None:
158
+ raise FileNotFoundError(f'Image Not Found {f}')
159
+
160
+ h0, w0 = im.shape[:2] # orig hw
161
+ if rect_mode: # resize long side to imgsz while maintaining aspect ratio
162
+ r = self.imgsz / max(h0, w0) # ratio
163
+ if r != 1: # if sizes are not equal
164
+ w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
165
+ im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
166
+ elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
167
+ im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
168
+
169
+ # Add to buffer if training with augmentations
170
+ if self.augment:
171
+ self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized
172
+ self.buffer.append(i)
173
+ if len(self.buffer) >= self.max_buffer_length:
174
+ j = self.buffer.pop(0)
175
+ self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None
176
+
177
+ return im, (h0, w0), im.shape[:2]
178
+
179
+ return self.ims[i], self.im_hw0[i], self.im_hw[i]
180
+
181
+ def cache_images(self, cache):
182
+ """Cache images to memory or disk."""
183
+ b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
184
+ fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image
185
+ with ThreadPool(NUM_THREADS) as pool:
186
+ results = pool.imap(fcn, range(self.ni))
187
+ pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0)
188
+ for i, x in pbar:
189
+ if cache == 'disk':
190
+ b += self.npy_files[i].stat().st_size
191
+ else: # 'ram'
192
+ self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
193
+ b += self.ims[i].nbytes
194
+ pbar.desc = f'{self.prefix}Caching images ({b / gb:.1f}GB {cache})'
195
+ pbar.close()
196
+
197
+ def cache_images_to_disk(self, i):
198
+ """Saves an image as an *.npy file for faster loading."""
199
+ f = self.npy_files[i]
200
+ if not f.exists():
201
+ np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
202
+
203
+ def check_cache_ram(self, safety_margin=0.5):
204
+ """Check image caching requirements vs available memory."""
205
+ b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
206
+ n = min(self.ni, 30) # extrapolate from 30 random images
207
+ for _ in range(n):
208
+ im = cv2.imread(random.choice(self.im_files)) # sample image
209
+ ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
210
+ b += im.nbytes * ratio ** 2
211
+ mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM
212
+ mem = psutil.virtual_memory()
213
+ cache = mem_required < mem.available # to cache or not to cache, that is the question
214
+ if not cache:
215
+ LOGGER.info(f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
216
+ f'with {int(safety_margin * 100)}% safety margin but only '
217
+ f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
218
+ f"{'caching images ✅' if cache else 'not caching images ⚠️'}")
219
+ return cache
220
+
221
+ def set_rectangle(self):
222
+ """Sets the shape of bounding boxes for YOLO detections as rectangles."""
223
+ bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int) # batch index
224
+ nb = bi[-1] + 1 # number of batches
225
+
226
+ s = np.array([x.pop('shape') for x in self.labels]) # hw
227
+ ar = s[:, 0] / s[:, 1] # aspect ratio
228
+ irect = ar.argsort()
229
+ self.im_files = [self.im_files[i] for i in irect]
230
+ self.labels = [self.labels[i] for i in irect]
231
+ ar = ar[irect]
232
+
233
+ # Set training image shapes
234
+ shapes = [[1, 1]] * nb
235
+ for i in range(nb):
236
+ ari = ar[bi == i]
237
+ mini, maxi = ari.min(), ari.max()
238
+ if maxi < 1:
239
+ shapes[i] = [maxi, 1]
240
+ elif mini > 1:
241
+ shapes[i] = [1, 1 / mini]
242
+
243
+ self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
244
+ self.batch = bi # batch index of image
245
+
246
+ def __getitem__(self, index):
247
+ """Returns transformed label information for given index."""
248
+ return self.transforms(self.get_image_and_label(index))
249
+
250
+ def get_image_and_label(self, index):
251
+ """Get and return label information from the dataset."""
252
+ label = deepcopy(self.labels[index]) # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
253
+ label.pop('shape', None) # shape is for rect, remove it
254
+ label['img'], label['ori_shape'], label['resized_shape'] = self.load_image(index)
255
+ label['ratio_pad'] = (label['resized_shape'][0] / label['ori_shape'][0],
256
+ label['resized_shape'][1] / label['ori_shape'][1]) # for evaluation
257
+ if self.rect:
258
+ label['rect_shape'] = self.batch_shapes[self.batch[index]]
259
+ return self.update_labels_info(label)
260
+
261
+ def __len__(self):
262
+ """Returns the length of the labels list for the dataset."""
263
+ return len(self.labels)
264
+
265
+ def update_labels_info(self, label):
266
+ """Custom your label format here."""
267
+ return label
268
+
269
+ def build_transforms(self, hyp=None):
270
+ """
271
+ Users can customize augmentations here.
272
+
273
+ Example:
274
+ ```python
275
+ if self.augment:
276
+ # Training transforms
277
+ return Compose([])
278
+ else:
279
+ # Val transforms
280
+ return Compose([])
281
+ ```
282
+ """
283
+ raise NotImplementedError
284
+
285
+ def get_labels(self):
286
+ """
287
+ Users can customize their own format here.
288
+
289
+ Note:
290
+ Ensure output is a dictionary with the following keys:
291
+ ```python
292
+ dict(
293
+ im_file=im_file,
294
+ shape=shape, # format: (height, width)
295
+ cls=cls,
296
+ bboxes=bboxes, # xywh
297
+ segments=segments, # xy
298
+ keypoints=keypoints, # xy
299
+ normalized=True, # or False
300
+ bbox_format="xyxy", # or xywh, ltwh
301
+ )
302
+ ```
303
+ """
304
+ raise NotImplementedError