Spaces:

HilmiZr
/

CLARIS-SkinAnalyzer

Runtime error

App Files Files Community

HilmiZr commited on Dec 10, 2023

Commit

bc6a1aa

1 Parent(s): 53132c1

added: ultralytics folder

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

ultralytics/__init__.py +8 -7
ultralytics/cfg/__init__.py +465 -0
ultralytics/cfg/datasets/Argoverse.yaml +73 -0
ultralytics/cfg/datasets/DOTAv2.yaml +37 -0
ultralytics/cfg/datasets/GlobalWheat2020.yaml +54 -0
ultralytics/cfg/datasets/ImageNet.yaml +2025 -0
ultralytics/cfg/datasets/Objects365.yaml +443 -0
ultralytics/cfg/datasets/SKU-110K.yaml +58 -0
ultralytics/cfg/datasets/VOC.yaml +100 -0
ultralytics/cfg/datasets/VisDrone.yaml +73 -0
ultralytics/cfg/datasets/coco-pose.yaml +38 -0
ultralytics/cfg/datasets/coco.yaml +115 -0
ultralytics/cfg/datasets/coco128-seg.yaml +101 -0
ultralytics/cfg/datasets/coco128.yaml +101 -0
ultralytics/cfg/datasets/coco8-pose.yaml +25 -0
ultralytics/cfg/datasets/coco8-seg.yaml +101 -0
ultralytics/cfg/datasets/coco8.yaml +101 -0
ultralytics/cfg/datasets/open-images-v7.yaml +661 -0
ultralytics/cfg/datasets/tiger-pose.yaml +24 -0
ultralytics/cfg/datasets/xView.yaml +153 -0
ultralytics/cfg/default.yaml +119 -0
ultralytics/cfg/models/README.md +40 -0
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +50 -0
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +42 -0
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +42 -0
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +54 -0
ultralytics/cfg/models/v3/yolov3-spp.yaml +48 -0
ultralytics/cfg/models/v3/yolov3-tiny.yaml +39 -0
ultralytics/cfg/models/v3/yolov3.yaml +48 -0
ultralytics/cfg/models/v5/yolov5-p6.yaml +61 -0
ultralytics/cfg/models/v5/yolov5.yaml +50 -0
ultralytics/cfg/models/v6/yolov6.yaml +53 -0
ultralytics/cfg/models/v8/yolov8-cls.yaml +29 -0
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +54 -0
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +56 -0
ultralytics/cfg/models/v8/yolov8-ghost.yaml +47 -0
ultralytics/cfg/models/v8/yolov8-p2.yaml +54 -0
ultralytics/cfg/models/v8/yolov8-p6.yaml +56 -0
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +57 -0
ultralytics/cfg/models/v8/yolov8-pose.yaml +47 -0
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +46 -0
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +56 -0
ultralytics/cfg/models/v8/yolov8-seg.yaml +46 -0
ultralytics/cfg/models/v8/yolov8.yaml +46 -0
ultralytics/cfg/trackers/botsort.yaml +18 -0
ultralytics/cfg/trackers/bytetrack.yaml +11 -0
ultralytics/data/__init__.py +8 -0
ultralytics/data/annotator.py +50 -0
ultralytics/data/augment.py +1107 -0
ultralytics/data/base.py +304 -0

ultralytics/__init__.py CHANGED Viewed

@@ -1,11 +1,12 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = '8.0.107'
-from ultralytics.hub import start
-from ultralytics.vit.rtdetr import RTDETR
-from ultralytics.vit.sam import SAM
-from ultralytics.yolo.engine.model import YOLO
-from ultralytics.yolo.utils.checks import check_yolo as checks
-__all__ = '__version__', 'YOLO', 'SAM', 'RTDETR', 'checks', 'start'  # allow simpler import

 # Ultralytics YOLO 🚀, AGPL-3.0 license
+__version__ = '8.0.225'
+from ultralytics.models import RTDETR, SAM, YOLO
+from ultralytics.models.fastsam import FastSAM
+from ultralytics.models.nas import NAS
+from ultralytics.utils import SETTINGS as settings
+from ultralytics.utils.checks import check_yolo as checks
+from ultralytics.utils.downloads import download
+__all__ = '__version__', 'YOLO', 'NAS', 'SAM', 'FastSAM', 'RTDETR', 'checks', 'download', 'settings'

ultralytics/cfg/__init__.py ADDED Viewed

	@@ -0,0 +1,465 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import contextlib
+import shutil
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Dict, List, Union
+from ultralytics.utils import (ASSETS, DEFAULT_CFG, DEFAULT_CFG_DICT, DEFAULT_CFG_PATH, LOGGER, RANK, ROOT, RUNS_DIR,
+                               SETTINGS, SETTINGS_YAML, TESTS_RUNNING, IterableSimpleNamespace, __version__, checks,
+                               colorstr, deprecation_warn, yaml_load, yaml_print)
+# Define valid tasks and modes
+MODES = 'train', 'val', 'predict', 'export', 'track', 'benchmark'
+TASKS = 'detect', 'segment', 'classify', 'pose'
+TASK2DATA = {'detect': 'coco8.yaml', 'segment': 'coco8-seg.yaml', 'classify': 'imagenet10', 'pose': 'coco8-pose.yaml'}
+TASK2MODEL = {
+    'detect': 'yolov8n.pt',
+    'segment': 'yolov8n-seg.pt',
+    'classify': 'yolov8n-cls.pt',
+    'pose': 'yolov8n-pose.pt'}
+TASK2METRIC = {
+    'detect': 'metrics/mAP50-95(B)',
+    'segment': 'metrics/mAP50-95(M)',
+    'classify': 'metrics/accuracy_top1',
+    'pose': 'metrics/mAP50-95(P)'}
+CLI_HELP_MSG = \
+    f"""
+    Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
+        yolo TASK MODE ARGS
+        Where   TASK (optional) is one of {TASKS}
+                MODE (required) is one of {MODES}
+                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
+                    See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
+    1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
+        yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
+    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
+        yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
+    3. Val a pretrained detection model at batch-size 1 and image size 640:
+        yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
+    4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
+        yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
+    5. Run special commands:
+        yolo help
+        yolo checks
+        yolo version
+        yolo settings
+        yolo copy-cfg
+        yolo cfg
+    Docs: https://docs.ultralytics.com
+    Community: https://community.ultralytics.com
+    GitHub: https://github.com/ultralytics/ultralytics
+    """
+# Define keys for arg type checks
+CFG_FLOAT_KEYS = 'warmup_epochs', 'box', 'cls', 'dfl', 'degrees', 'shear'
+CFG_FRACTION_KEYS = ('dropout', 'iou', 'lr0', 'lrf', 'momentum', 'weight_decay', 'warmup_momentum', 'warmup_bias_lr',
+                     'label_smoothing', 'hsv_h', 'hsv_s', 'hsv_v', 'translate', 'scale', 'perspective', 'flipud',
+                     'fliplr', 'mosaic', 'mixup', 'copy_paste', 'conf', 'iou', 'fraction')  # fraction floats 0.0 - 1.0
+CFG_INT_KEYS = ('epochs', 'patience', 'batch', 'workers', 'seed', 'close_mosaic', 'mask_ratio', 'max_det', 'vid_stride',
+                'line_width', 'workspace', 'nbs', 'save_period')
+CFG_BOOL_KEYS = ('save', 'exist_ok', 'verbose', 'deterministic', 'single_cls', 'rect', 'cos_lr', 'overlap_mask', 'val',
+                 'save_json', 'save_hybrid', 'half', 'dnn', 'plots', 'show', 'save_txt', 'save_conf', 'save_crop',
+                 'save_frames', 'show_labels', 'show_conf', 'visualize', 'augment', 'agnostic_nms', 'retina_masks',
+                 'show_boxes', 'keras', 'optimize', 'int8', 'dynamic', 'simplify', 'nms', 'profile')
+def cfg2dict(cfg):
+    """
+    Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
+    Args:
+        cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
+    Returns:
+        cfg (dict): Configuration object in dictionary format.
+    """
+    if isinstance(cfg, (str, Path)):
+        cfg = yaml_load(cfg)  # load dict
+    elif isinstance(cfg, SimpleNamespace):
+        cfg = vars(cfg)  # convert to dict
+    return cfg
+def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
+    """
+    Load and merge configuration data from a file or dictionary.
+    Args:
+        cfg (str | Path | Dict | SimpleNamespace): Configuration data.
+        overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
+    Returns:
+        (SimpleNamespace): Training arguments namespace.
+    """
+    cfg = cfg2dict(cfg)
+    # Merge overrides
+    if overrides:
+        overrides = cfg2dict(overrides)
+        if 'save_dir' not in cfg:
+            overrides.pop('save_dir', None)  # special override keys to ignore
+        check_dict_alignment(cfg, overrides)
+        cfg = {**cfg, **overrides}  # merge cfg and overrides dicts (prefer overrides)
+    # Special handling for numeric project/name
+    for k in 'project', 'name':
+        if k in cfg and isinstance(cfg[k], (int, float)):
+            cfg[k] = str(cfg[k])
+    if cfg.get('name') == 'model':  # assign model to 'name' arg
+        cfg['name'] = cfg.get('model', '').split('.')[0]
+        LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
+    # Type and Value checks
+    for k, v in cfg.items():
+        if v is not None:  # None values may be from optional args
+            if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
+                raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                                f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
+            elif k in CFG_FRACTION_KEYS:
+                if not isinstance(v, (int, float)):
+                    raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                                    f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')")
+                if not (0.0 <= v <= 1.0):
+                    raise ValueError(f"'{k}={v}' is an invalid value. "
+                                     f"Valid '{k}' values are between 0.0 and 1.0.")
+            elif k in CFG_INT_KEYS and not isinstance(v, int):
+                raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                                f"'{k}' must be an int (i.e. '{k}=8')")
+            elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
+                raise TypeError(f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                                f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')")
+    # Return instance
+    return IterableSimpleNamespace(**cfg)
+def get_save_dir(args, name=None):
+    """Return save_dir as created from train/val/predict arguments."""
+    if getattr(args, 'save_dir', None):
+        save_dir = args.save_dir
+    else:
+        from ultralytics.utils.files import increment_path
+        project = args.project or (ROOT.parent / 'tests/tmp/runs' if TESTS_RUNNING else RUNS_DIR) / args.task
+        name = name or args.name or f'{args.mode}'
+        save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
+    return Path(save_dir)
+def _handle_deprecation(custom):
+    """Hardcoded function to handle deprecated config keys."""
+    for key in custom.copy().keys():
+        if key == 'boxes':
+            deprecation_warn(key, 'show_boxes')
+            custom['show_boxes'] = custom.pop('boxes')
+        if key == 'hide_labels':
+            deprecation_warn(key, 'show_labels')
+            custom['show_labels'] = custom.pop('hide_labels') == 'False'
+        if key == 'hide_conf':
+            deprecation_warn(key, 'show_conf')
+            custom['show_conf'] = custom.pop('hide_conf') == 'False'
+        if key == 'line_thickness':
+            deprecation_warn(key, 'line_width')
+            custom['line_width'] = custom.pop('line_thickness')
+    return custom
+def check_dict_alignment(base: Dict, custom: Dict, e=None):
+    """
+    This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
+    any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
+    Args:
+        custom (dict): a dictionary of custom configuration options
+        base (dict): a dictionary of base configuration options
+        e (Error, optional): An optional error that is passed by the calling function.
+    """
+    custom = _handle_deprecation(custom)
+    base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
+    mismatched = [k for k in custom_keys if k not in base_keys]
+    if mismatched:
+        from difflib import get_close_matches
+        string = ''
+        for x in mismatched:
+            matches = get_close_matches(x, base_keys)  # key list
+            matches = [f'{k}={base[k]}' if base.get(k) is not None else k for k in matches]
+            match_str = f'Similar arguments are i.e. {matches}.' if matches else ''
+            string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
+        raise SyntaxError(string + CLI_HELP_MSG) from e
+def merge_equals_args(args: List[str]) -> List[str]:
+    """
+    Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
+    argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
+    Args:
+        args (List[str]): A list of strings where each element is an argument.
+    Returns:
+        List[str]: A list of strings where the arguments around isolated '=' are merged.
+    """
+    new_args = []
+    for i, arg in enumerate(args):
+        if arg == '=' and 0 < i < len(args) - 1:  # merge ['arg', '=', 'val']
+            new_args[-1] += f'={args[i + 1]}'
+            del args[i + 1]
+        elif arg.endswith('=') and i < len(args) - 1 and '=' not in args[i + 1]:  # merge ['arg=', 'val']
+            new_args.append(f'{arg}{args[i + 1]}')
+            del args[i + 1]
+        elif arg.startswith('=') and i > 0:  # merge ['arg', '=val']
+            new_args[-1] += arg
+        else:
+            new_args.append(arg)
+    return new_args
+def handle_yolo_hub(args: List[str]) -> None:
+    """
+    Handle Ultralytics HUB command-line interface (CLI) commands.
+    This function processes Ultralytics HUB CLI commands such as login and logout.
+    It should be called when executing a script with arguments related to HUB authentication.
+    Args:
+        args (List[str]): A list of command line arguments
+    Example:
+        ```bash
+        python my_script.py hub login your_api_key
+        ```
+    """
+    from ultralytics import hub
+    if args[0] == 'login':
+        key = args[1] if len(args) > 1 else ''
+        # Log in to Ultralytics HUB using the provided API key
+        hub.login(key)
+    elif args[0] == 'logout':
+        # Log out from Ultralytics HUB
+        hub.logout()
+def handle_yolo_settings(args: List[str]) -> None:
+    """
+    Handle YOLO settings command-line interface (CLI) commands.
+    This function processes YOLO settings CLI commands such as reset.
+    It should be called when executing a script with arguments related to YOLO settings management.
+    Args:
+        args (List[str]): A list of command line arguments for YOLO settings management.
+    Example:
+        ```bash
+        python my_script.py yolo settings reset
+        ```
+    """
+    url = 'https://docs.ultralytics.com/quickstart/#ultralytics-settings'  # help URL
+    try:
+        if any(args):
+            if args[0] == 'reset':
+                SETTINGS_YAML.unlink()  # delete the settings file
+                SETTINGS.reset()  # create new settings
+                LOGGER.info('Settings reset successfully')  # inform the user that settings have been reset
+            else:  # save a new setting
+                new = dict(parse_key_value_pair(a) for a in args)
+                check_dict_alignment(SETTINGS, new)
+                SETTINGS.update(new)
+        LOGGER.info(f'💡 Learn about settings at {url}')
+        yaml_print(SETTINGS_YAML)  # print the current settings
+    except Exception as e:
+        LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
+def parse_key_value_pair(pair):
+    """Parse one 'key=value' pair and return key and value."""
+    k, v = pair.split('=', 1)  # split on first '=' sign
+    k, v = k.strip(), v.strip()  # remove spaces
+    assert v, f"missing '{k}' value"
+    return k, smart_value(v)
+def smart_value(v):
+    """Convert a string to an underlying type such as int, float, bool, etc."""
+    v_lower = v.lower()
+    if v_lower == 'none':
+        return None
+    elif v_lower == 'true':
+        return True
+    elif v_lower == 'false':
+        return False
+    else:
+        with contextlib.suppress(Exception):
+            return eval(v)
+        return v
+def entrypoint(debug=''):
+    """
+    This function is the ultralytics package entrypoint, it's responsible for parsing the command line arguments passed
+    to the package.
+    This function allows for:
+    - passing mandatory YOLO args as a list of strings
+    - specifying the task to be performed, either 'detect', 'segment' or 'classify'
+    - specifying the mode, either 'train', 'val', 'test', or 'predict'
+    - running special modes like 'checks'
+    - passing overrides to the package's configuration
+    It uses the package's default cfg and initializes it using the passed overrides.
+    Then it calls the CLI function with the composed cfg
+    """
+    args = (debug.split(' ') if debug else sys.argv)[1:]
+    if not args:  # no arguments passed
+        LOGGER.info(CLI_HELP_MSG)
+        return
+    special = {
+        'help': lambda: LOGGER.info(CLI_HELP_MSG),
+        'checks': checks.collect_system_info,
+        'version': lambda: LOGGER.info(__version__),
+        'settings': lambda: handle_yolo_settings(args[1:]),
+        'cfg': lambda: yaml_print(DEFAULT_CFG_PATH),
+        'hub': lambda: handle_yolo_hub(args[1:]),
+        'login': lambda: handle_yolo_hub(args),
+        'copy-cfg': copy_default_cfg}
+    full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
+    # Define common misuses of special commands, i.e. -h, -help, --help
+    special.update({k[0]: v for k, v in special.items()})  # singular
+    special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith('s')})  # singular
+    special = {**special, **{f'-{k}': v for k, v in special.items()}, **{f'--{k}': v for k, v in special.items()}}
+    overrides = {}  # basic overrides, i.e. imgsz=320
+    for a in merge_equals_args(args):  # merge spaces around '=' sign
+        if a.startswith('--'):
+            LOGGER.warning(f"WARNING ⚠️ '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
+            a = a[2:]
+        if a.endswith(','):
+            LOGGER.warning(f"WARNING ⚠️ '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
+            a = a[:-1]
+        if '=' in a:
+            try:
+                k, v = parse_key_value_pair(a)
+                if k == 'cfg' and v is not None:  # custom.yaml passed
+                    LOGGER.info(f'Overriding {DEFAULT_CFG_PATH} with {v}')
+                    overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != 'cfg'}
+                else:
+                    overrides[k] = v
+            except (NameError, SyntaxError, ValueError, AssertionError) as e:
+                check_dict_alignment(full_args_dict, {a: ''}, e)
+        elif a in TASKS:
+            overrides['task'] = a
+        elif a in MODES:
+            overrides['mode'] = a
+        elif a.lower() in special:
+            special[a.lower()]()
+            return
+        elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
+            overrides[a] = True  # auto-True for default bool args, i.e. 'yolo show' sets show=True
+        elif a in DEFAULT_CFG_DICT:
+            raise SyntaxError(f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
+                              f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}")
+        else:
+            check_dict_alignment(full_args_dict, {a: ''})
+    # Check keys
+    check_dict_alignment(full_args_dict, overrides)
+    # Mode
+    mode = overrides.get('mode')
+    if mode is None:
+        mode = DEFAULT_CFG.mode or 'predict'
+        LOGGER.warning(f"WARNING ⚠️ 'mode' is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
+    elif mode not in MODES:
+        raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
+    # Task
+    task = overrides.pop('task', None)
+    if task:
+        if task not in TASKS:
+            raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
+        if 'model' not in overrides:
+            overrides['model'] = TASK2MODEL[task]
+    # Model
+    model = overrides.pop('model', DEFAULT_CFG.model)
+    if model is None:
+        model = 'yolov8n.pt'
+        LOGGER.warning(f"WARNING ⚠️ 'model' is missing. Using default 'model={model}'.")
+    overrides['model'] = model
+    stem = Path(model).stem.lower()
+    if 'rtdetr' in stem:  # guess architecture
+        from ultralytics import RTDETR
+        model = RTDETR(model)  # no task argument
+    elif 'fastsam' in stem:
+        from ultralytics import FastSAM
+        model = FastSAM(model)
+    elif 'sam' in stem:
+        from ultralytics import SAM
+        model = SAM(model)
+    else:
+        from ultralytics import YOLO
+        model = YOLO(model, task=task)
+    if isinstance(overrides.get('pretrained'), str):
+        model.load(overrides['pretrained'])
+    # Task Update
+    if task != model.task:
+        if task:
+            LOGGER.warning(f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
+                           f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model.")
+        task = model.task
+    # Mode
+    if mode in ('predict', 'track') and 'source' not in overrides:
+        overrides['source'] = DEFAULT_CFG.source or ASSETS
+        LOGGER.warning(f"WARNING ⚠️ 'source' is missing. Using default 'source={overrides['source']}'.")
+    elif mode in ('train', 'val'):
+        if 'data' not in overrides and 'resume' not in overrides:
+            overrides['data'] = TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
+            LOGGER.warning(f"WARNING ⚠️ 'data' is missing. Using default 'data={overrides['data']}'.")
+    elif mode == 'export':
+        if 'format' not in overrides:
+            overrides['format'] = DEFAULT_CFG.format or 'torchscript'
+            LOGGER.warning(f"WARNING ⚠️ 'format' is missing. Using default 'format={overrides['format']}'.")
+    # Run command in python
+    getattr(model, mode)(**overrides)  # default args from model
+    # Show help
+    LOGGER.info(f'💡 Learn more at https://docs.ultralytics.com/modes/{mode}')
+# Special modes --------------------------------------------------------------------------------------------------------
+def copy_default_cfg():
+    """Copy and create a new default configuration file with '_copy' appended to its name."""
+    new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace('.yaml', '_copy.yaml')
+    shutil.copy2(DEFAULT_CFG_PATH, new_file)
+    LOGGER.info(f'{DEFAULT_CFG_PATH} copied to {new_file}\n'
+                f"Example YOLO command with this new custom cfg:\n    yolo cfg='{new_file}' imgsz=320 batch=8")
+if __name__ == '__main__':
+    # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
+    entrypoint(debug='')

ultralytics/cfg/datasets/Argoverse.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~mengtial/proj/streaming/ by Argo AI
+# Example usage: yolo train data=Argoverse.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── Argoverse  ← downloads here (31.5 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Argoverse  # dataset root dir
+train: Argoverse-1.1/images/train/  # train images (relative to 'path') 39384 images
+val: Argoverse-1.1/images/val/  # val images (relative to 'path') 15062 images
+test: Argoverse-1.1/images/test/  # test images (optional) https://eval.ai/web/challenges/challenge-page/800/overview
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: bus
+  5: truck
+  6: traffic_light
+  7: stop_sign
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  from tqdm import tqdm
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+  def argoverse2yolo(set):
+      labels = {}
+      a = json.load(open(set, "rb"))
+      for annot in tqdm(a['annotations'], desc=f"Converting {set} to YOLOv5 format..."):
+          img_id = annot['image_id']
+          img_name = a['images'][img_id]['name']
+          img_label_name = f'{img_name[:-3]}txt'
+          cls = annot['category_id']  # instance class id
+          x_center, y_center, width, height = annot['bbox']
+          x_center = (x_center + width / 2) / 1920.0  # offset and scale
+          y_center = (y_center + height / 2) / 1200.0  # offset and scale
+          width /= 1920.0  # scale
+          height /= 1200.0  # scale
+          img_dir = set.parents[2] / 'Argoverse-1.1' / 'labels' / a['seq_dirs'][a['images'][annot['image_id']]['sid']]
+          if not img_dir.exists():
+              img_dir.mkdir(parents=True, exist_ok=True)
+          k = str(img_dir / img_label_name)
+          if k not in labels:
+              labels[k] = []
+          labels[k].append(f"{cls} {x_center} {y_center} {width} {height}\n")
+      for k in labels:
+          with open(k, "w") as f:
+              f.writelines(labels[k])
+  # Download 'https://argoverse-hd.s3.us-east-2.amazonaws.com/Argoverse-HD-Full.zip' (deprecated S3 link)
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link']
+  download(urls, dir=dir)
+  # Convert
+  annotations_dir = 'Argoverse-HD/annotations/'
+  (dir / 'Argoverse-1.1' / 'tracking').rename(dir / 'Argoverse-1.1' / 'images')  # rename 'tracking' to 'images'
+  for d in "train.json", "val.json":
+      argoverse2yolo(dir / annotations_dir / d)  # convert Argoverse annotations to YOLO labels

ultralytics/cfg/datasets/DOTAv2.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DOTA 2.0 dataset https://captain-whu.github.io/DOTA/index.html for object detection in aerial images by Wuhan University
+# Example usage: yolo train model=yolov8n-obb.pt data=DOTAv2.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── dota2  ← downloads here (2GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/DOTAv2  # dataset root dir
+train: images/train  # train images (relative to 'path') 1411 images
+val: images/val  # val images (relative to 'path') 458 images
+test: images/test  # test images (optional) 937 images
+# Classes for DOTA 2.0
+names:
+  0: plane
+  1: ship
+  2: storage tank
+  3: baseball diamond
+  4: tennis court
+  5: basketball court
+  6: ground track field
+  7: harbor
+  8: bridge
+  9: large vehicle
+  10: small vehicle
+  11: helicopter
+  12: roundabout
+  13: soccer ball field
+  14: swimming pool
+  15: container crane
+  16: airport
+  17: helipad
+# Download script/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/DOTAv2.zip

ultralytics/cfg/datasets/GlobalWheat2020.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Saskatchewan
+# Example usage: yolo train data=GlobalWheat2020.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── GlobalWheat2020  ← downloads here (7.0 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/GlobalWheat2020  # dataset root dir
+train: # train images (relative to 'path') 3422 images
+  - images/arvalis_1
+  - images/arvalis_2
+  - images/arvalis_3
+  - images/ethz_1
+  - images/rres_1
+  - images/inrae_1
+  - images/usask_1
+val: # val images (relative to 'path') 748 images (WARNING: train set contains ethz_1)
+  - images/ethz_1
+test: # test images (optional) 1276 images
+  - images/utokyo_1
+  - images/utokyo_2
+  - images/nau_1
+  - images/uq_1
+# Classes
+names:
+  0: wheat_head
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/GlobalWheat2020_labels.zip']
+  download(urls, dir=dir)
+  # Make Directories
+  for p in 'annotations', 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+  # Move
+  for p in 'arvalis_1', 'arvalis_2', 'arvalis_3', 'ethz_1', 'rres_1', 'inrae_1', 'usask_1', \
+           'utokyo_1', 'utokyo_2', 'nau_1', 'uq_1':
+      (dir / 'global-wheat-codalab-official' / p).rename(dir / 'images' / p)  # move to /images
+      f = (dir / 'global-wheat-codalab-official' / p).with_suffix('.json')  # json file
+      if f.exists():
+          f.rename((dir / 'annotations' / p).with_suffix('.json'))  # move to /annotations

ultralytics/cfg/datasets/ImageNet.yaml ADDED Viewed

	@@ -0,0 +1,2025 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford University
+# Simplified class names from https://github.com/anishathalye/imagenet-simple-labels
+# Example usage: yolo train task=classify data=imagenet
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── imagenet  ← downloads here (144 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/imagenet  # dataset root dir
+train: train  # train images (relative to 'path') 1281167 images
+val: val  # val images (relative to 'path') 50000 images
+test:  # test images (optional)
+# Classes
+names:
+  0: tench
+  1: goldfish
+  2: great white shark
+  3: tiger shark
+  4: hammerhead shark
+  5: electric ray
+  6: stingray
+  7: cock
+  8: hen
+  9: ostrich
+  10: brambling
+  11: goldfinch
+  12: house finch
+  13: junco
+  14: indigo bunting
+  15: American robin
+  16: bulbul
+  17: jay
+  18: magpie
+  19: chickadee
+  20: American dipper
+  21: kite
+  22: bald eagle
+  23: vulture
+  24: great grey owl
+  25: fire salamander
+  26: smooth newt
+  27: newt
+  28: spotted salamander
+  29: axolotl
+  30: American bullfrog
+  31: tree frog
+  32: tailed frog
+  33: loggerhead sea turtle
+  34: leatherback sea turtle
+  35: mud turtle
+  36: terrapin
+  37: box turtle
+  38: banded gecko
+  39: green iguana
+  40: Carolina anole
+  41: desert grassland whiptail lizard
+  42: agama
+  43: frilled-necked lizard
+  44: alligator lizard
+  45: Gila monster
+  46: European green lizard
+  47: chameleon
+  48: Komodo dragon
+  49: Nile crocodile
+  50: American alligator
+  51: triceratops
+  52: worm snake
+  53: ring-necked snake
+  54: eastern hog-nosed snake
+  55: smooth green snake
+  56: kingsnake
+  57: garter snake
+  58: water snake
+  59: vine snake
+  60: night snake
+  61: boa constrictor
+  62: African rock python
+  63: Indian cobra
+  64: green mamba
+  65: sea snake
+  66: Saharan horned viper
+  67: eastern diamondback rattlesnake
+  68: sidewinder
+  69: trilobite
+  70: harvestman
+  71: scorpion
+  72: yellow garden spider
+  73: barn spider
+  74: European garden spider
+  75: southern black widow
+  76: tarantula
+  77: wolf spider
+  78: tick
+  79: centipede
+  80: black grouse
+  81: ptarmigan
+  82: ruffed grouse
+  83: prairie grouse
+  84: peacock
+  85: quail
+  86: partridge
+  87: grey parrot
+  88: macaw
+  89: sulphur-crested cockatoo
+  90: lorikeet
+  91: coucal
+  92: bee eater
+  93: hornbill
+  94: hummingbird
+  95: jacamar
+  96: toucan
+  97: duck
+  98: red-breasted merganser
+  99: goose
+  100: black swan
+  101: tusker
+  102: echidna
+  103: platypus
+  104: wallaby
+  105: koala
+  106: wombat
+  107: jellyfish
+  108: sea anemone
+  109: brain coral
+  110: flatworm
+  111: nematode
+  112: conch
+  113: snail
+  114: slug
+  115: sea slug
+  116: chiton
+  117: chambered nautilus
+  118: Dungeness crab
+  119: rock crab
+  120: fiddler crab
+  121: red king crab
+  122: American lobster
+  123: spiny lobster
+  124: crayfish
+  125: hermit crab
+  126: isopod
+  127: white stork
+  128: black stork
+  129: spoonbill
+  130: flamingo
+  131: little blue heron
+  132: great egret
+  133: bittern
+  134: crane (bird)
+  135: limpkin
+  136: common gallinule
+  137: American coot
+  138: bustard
+  139: ruddy turnstone
+  140: dunlin
+  141: common redshank
+  142: dowitcher
+  143: oystercatcher
+  144: pelican
+  145: king penguin
+  146: albatross
+  147: grey whale
+  148: killer whale
+  149: dugong
+  150: sea lion
+  151: Chihuahua
+  152: Japanese Chin
+  153: Maltese
+  154: Pekingese
+  155: Shih Tzu
+  156: King Charles Spaniel
+  157: Papillon
+  158: toy terrier
+  159: Rhodesian Ridgeback
+  160: Afghan Hound
+  161: Basset Hound
+  162: Beagle
+  163: Bloodhound
+  164: Bluetick Coonhound
+  165: Black and Tan Coonhound
+  166: Treeing Walker Coonhound
+  167: English foxhound
+  168: Redbone Coonhound
+  169: borzoi
+  170: Irish Wolfhound
+  171: Italian Greyhound
+  172: Whippet
+  173: Ibizan Hound
+  174: Norwegian Elkhound
+  175: Otterhound
+  176: Saluki
+  177: Scottish Deerhound
+  178: Weimaraner
+  179: Staffordshire Bull Terrier
+  180: American Staffordshire Terrier
+  181: Bedlington Terrier
+  182: Border Terrier
+  183: Kerry Blue Terrier
+  184: Irish Terrier
+  185: Norfolk Terrier
+  186: Norwich Terrier
+  187: Yorkshire Terrier
+  188: Wire Fox Terrier
+  189: Lakeland Terrier
+  190: Sealyham Terrier
+  191: Airedale Terrier
+  192: Cairn Terrier
+  193: Australian Terrier
+  194: Dandie Dinmont Terrier
+  195: Boston Terrier
+  196: Miniature Schnauzer
+  197: Giant Schnauzer
+  198: Standard Schnauzer
+  199: Scottish Terrier
+  200: Tibetan Terrier
+  201: Australian Silky Terrier
+  202: Soft-coated Wheaten Terrier
+  203: West Highland White Terrier
+  204: Lhasa Apso
+  205: Flat-Coated Retriever
+  206: Curly-coated Retriever
+  207: Golden Retriever
+  208: Labrador Retriever
+  209: Chesapeake Bay Retriever
+  210: German Shorthaired Pointer
+  211: Vizsla
+  212: English Setter
+  213: Irish Setter
+  214: Gordon Setter
+  215: Brittany
+  216: Clumber Spaniel
+  217: English Springer Spaniel
+  218: Welsh Springer Spaniel
+  219: Cocker Spaniels
+  220: Sussex Spaniel
+  221: Irish Water Spaniel
+  222: Kuvasz
+  223: Schipperke
+  224: Groenendael
+  225: Malinois
+  226: Briard
+  227: Australian Kelpie
+  228: Komondor
+  229: Old English Sheepdog
+  230: Shetland Sheepdog
+  231: collie
+  232: Border Collie
+  233: Bouvier des Flandres
+  234: Rottweiler
+  235: German Shepherd Dog
+  236: Dobermann
+  237: Miniature Pinscher
+  238: Greater Swiss Mountain Dog
+  239: Bernese Mountain Dog
+  240: Appenzeller Sennenhund
+  241: Entlebucher Sennenhund
+  242: Boxer
+  243: Bullmastiff
+  244: Tibetan Mastiff
+  245: French Bulldog
+  246: Great Dane
+  247: St. Bernard
+  248: husky
+  249: Alaskan Malamute
+  250: Siberian Husky
+  251: Dalmatian
+  252: Affenpinscher
+  253: Basenji
+  254: pug
+  255: Leonberger
+  256: Newfoundland
+  257: Pyrenean Mountain Dog
+  258: Samoyed
+  259: Pomeranian
+  260: Chow Chow
+  261: Keeshond
+  262: Griffon Bruxellois
+  263: Pembroke Welsh Corgi
+  264: Cardigan Welsh Corgi
+  265: Toy Poodle
+  266: Miniature Poodle
+  267: Standard Poodle
+  268: Mexican hairless dog
+  269: grey wolf
+  270: Alaskan tundra wolf
+  271: red wolf
+  272: coyote
+  273: dingo
+  274: dhole
+  275: African wild dog
+  276: hyena
+  277: red fox
+  278: kit fox
+  279: Arctic fox
+  280: grey fox
+  281: tabby cat
+  282: tiger cat
+  283: Persian cat
+  284: Siamese cat
+  285: Egyptian Mau
+  286: cougar
+  287: lynx
+  288: leopard
+  289: snow leopard
+  290: jaguar
+  291: lion
+  292: tiger
+  293: cheetah
+  294: brown bear
+  295: American black bear
+  296: polar bear
+  297: sloth bear
+  298: mongoose
+  299: meerkat
+  300: tiger beetle
+  301: ladybug
+  302: ground beetle
+  303: longhorn beetle
+  304: leaf beetle
+  305: dung beetle
+  306: rhinoceros beetle
+  307: weevil
+  308: fly
+  309: bee
+  310: ant
+  311: grasshopper
+  312: cricket
+  313: stick insect
+  314: cockroach
+  315: mantis
+  316: cicada
+  317: leafhopper
+  318: lacewing
+  319: dragonfly
+  320: damselfly
+  321: red admiral
+  322: ringlet
+  323: monarch butterfly
+  324: small white
+  325: sulphur butterfly
+  326: gossamer-winged butterfly
+  327: starfish
+  328: sea urchin
+  329: sea cucumber
+  330: cottontail rabbit
+  331: hare
+  332: Angora rabbit
+  333: hamster
+  334: porcupine
+  335: fox squirrel
+  336: marmot
+  337: beaver
+  338: guinea pig
+  339: common sorrel
+  340: zebra
+  341: pig
+  342: wild boar
+  343: warthog
+  344: hippopotamus
+  345: ox
+  346: water buffalo
+  347: bison
+  348: ram
+  349: bighorn sheep
+  350: Alpine ibex
+  351: hartebeest
+  352: impala
+  353: gazelle
+  354: dromedary
+  355: llama
+  356: weasel
+  357: mink
+  358: European polecat
+  359: black-footed ferret
+  360: otter
+  361: skunk
+  362: badger
+  363: armadillo
+  364: three-toed sloth
+  365: orangutan
+  366: gorilla
+  367: chimpanzee
+  368: gibbon
+  369: siamang
+  370: guenon
+  371: patas monkey
+  372: baboon
+  373: macaque
+  374: langur
+  375: black-and-white colobus
+  376: proboscis monkey
+  377: marmoset
+  378: white-headed capuchin
+  379: howler monkey
+  380: titi
+  381: Geoffroy's spider monkey
+  382: common squirrel monkey
+  383: ring-tailed lemur
+  384: indri
+  385: Asian elephant
+  386: African bush elephant
+  387: red panda
+  388: giant panda
+  389: snoek
+  390: eel
+  391: coho salmon
+  392: rock beauty
+  393: clownfish
+  394: sturgeon
+  395: garfish
+  396: lionfish
+  397: pufferfish
+  398: abacus
+  399: abaya
+  400: academic gown
+  401: accordion
+  402: acoustic guitar
+  403: aircraft carrier
+  404: airliner
+  405: airship
+  406: altar
+  407: ambulance
+  408: amphibious vehicle
+  409: analog clock
+  410: apiary
+  411: apron
+  412: waste container
+  413: assault rifle
+  414: backpack
+  415: bakery
+  416: balance beam
+  417: balloon
+  418: ballpoint pen
+  419: Band-Aid
+  420: banjo
+  421: baluster
+  422: barbell
+  423: barber chair
+  424: barbershop
+  425: barn
+  426: barometer
+  427: barrel
+  428: wheelbarrow
+  429: baseball
+  430: basketball
+  431: bassinet
+  432: bassoon
+  433: swimming cap
+  434: bath towel
+  435: bathtub
+  436: station wagon
+  437: lighthouse
+  438: beaker
+  439: military cap
+  440: beer bottle
+  441: beer glass
+  442: bell-cot
+  443: bib
+  444: tandem bicycle
+  445: bikini
+  446: ring binder
+  447: binoculars
+  448: birdhouse
+  449: boathouse
+  450: bobsleigh
+  451: bolo tie
+  452: poke bonnet
+  453: bookcase
+  454: bookstore
+  455: bottle cap
+  456: bow
+  457: bow tie
+  458: brass
+  459: bra
+  460: breakwater
+  461: breastplate
+  462: broom
+  463: bucket
+  464: buckle
+  465: bulletproof vest
+  466: high-speed train
+  467: butcher shop
+  468: taxicab
+  469: cauldron
+  470: candle
+  471: cannon
+  472: canoe
+  473: can opener
+  474: cardigan
+  475: car mirror
+  476: carousel
+  477: tool kit
+  478: carton
+  479: car wheel
+  480: automated teller machine
+  481: cassette
+  482: cassette player
+  483: castle
+  484: catamaran
+  485: CD player
+  486: cello
+  487: mobile phone
+  488: chain
+  489: chain-link fence
+  490: chain mail
+  491: chainsaw
+  492: chest
+  493: chiffonier
+  494: chime
+  495: china cabinet
+  496: Christmas stocking
+  497: church
+  498: movie theater
+  499: cleaver
+  500: cliff dwelling
+  501: cloak
+  502: clogs
+  503: cocktail shaker
+  504: coffee mug
+  505: coffeemaker
+  506: coil
+  507: combination lock
+  508: computer keyboard
+  509: confectionery store
+  510: container ship
+  511: convertible
+  512: corkscrew
+  513: cornet
+  514: cowboy boot
+  515: cowboy hat
+  516: cradle
+  517: crane (machine)
+  518: crash helmet
+  519: crate
+  520: infant bed
+  521: Crock Pot
+  522: croquet ball
+  523: crutch
+  524: cuirass
+  525: dam
+  526: desk
+  527: desktop computer
+  528: rotary dial telephone
+  529: diaper
+  530: digital clock
+  531: digital watch
+  532: dining table
+  533: dishcloth
+  534: dishwasher
+  535: disc brake
+  536: dock
+  537: dog sled
+  538: dome
+  539: doormat
+  540: drilling rig
+  541: drum
+  542: drumstick
+  543: dumbbell
+  544: Dutch oven
+  545: electric fan
+  546: electric guitar
+  547: electric locomotive
+  548: entertainment center
+  549: envelope
+  550: espresso machine
+  551: face powder
+  552: feather boa
+  553: filing cabinet
+  554: fireboat
+  555: fire engine
+  556: fire screen sheet
+  557: flagpole
+  558: flute
+  559: folding chair
+  560: football helmet
+  561: forklift
+  562: fountain
+  563: fountain pen
+  564: four-poster bed
+  565: freight car
+  566: French horn
+  567: frying pan
+  568: fur coat
+  569: garbage truck
+  570: gas mask
+  571: gas pump
+  572: goblet
+  573: go-kart
+  574: golf ball
+  575: golf cart
+  576: gondola
+  577: gong
+  578: gown
+  579: grand piano
+  580: greenhouse
+  581: grille
+  582: grocery store
+  583: guillotine
+  584: barrette
+  585: hair spray
+  586: half-track
+  587: hammer
+  588: hamper
+  589: hair dryer
+  590: hand-held computer
+  591: handkerchief
+  592: hard disk drive
+  593: harmonica
+  594: harp
+  595: harvester
+  596: hatchet
+  597: holster
+  598: home theater
+  599: honeycomb
+  600: hook
+  601: hoop skirt
+  602: horizontal bar
+  603: horse-drawn vehicle
+  604: hourglass
+  605: iPod
+  606: clothes iron
+  607: jack-o'-lantern
+  608: jeans
+  609: jeep
+  610: T-shirt
+  611: jigsaw puzzle
+  612: pulled rickshaw
+  613: joystick
+  614: kimono
+  615: knee pad
+  616: knot
+  617: lab coat
+  618: ladle
+  619: lampshade
+  620: laptop computer
+  621: lawn mower
+  622: lens cap
+  623: paper knife
+  624: library
+  625: lifeboat
+  626: lighter
+  627: limousine
+  628: ocean liner
+  629: lipstick
+  630: slip-on shoe
+  631: lotion
+  632: speaker
+  633: loupe
+  634: sawmill
+  635: magnetic compass
+  636: mail bag
+  637: mailbox
+  638: tights
+  639: tank suit
+  640: manhole cover
+  641: maraca
+  642: marimba
+  643: mask
+  644: match
+  645: maypole
+  646: maze
+  647: measuring cup
+  648: medicine chest
+  649: megalith
+  650: microphone
+  651: microwave oven
+  652: military uniform
+  653: milk can
+  654: minibus
+  655: miniskirt
+  656: minivan
+  657: missile
+  658: mitten
+  659: mixing bowl
+  660: mobile home
+  661: Model T
+  662: modem
+  663: monastery
+  664: monitor
+  665: moped
+  666: mortar
+  667: square academic cap
+  668: mosque
+  669: mosquito net
+  670: scooter
+  671: mountain bike
+  672: tent
+  673: computer mouse
+  674: mousetrap
+  675: moving van
+  676: muzzle
+  677: nail
+  678: neck brace
+  679: necklace
+  680: nipple
+  681: notebook computer
+  682: obelisk
+  683: oboe
+  684: ocarina
+  685: odometer
+  686: oil filter
+  687: organ
+  688: oscilloscope
+  689: overskirt
+  690: bullock cart
+  691: oxygen mask
+  692: packet
+  693: paddle
+  694: paddle wheel
+  695: padlock
+  696: paintbrush
+  697: pajamas
+  698: palace
+  699: pan flute
+  700: paper towel
+  701: parachute
+  702: parallel bars
+  703: park bench
+  704: parking meter
+  705: passenger car
+  706: patio
+  707: payphone
+  708: pedestal
+  709: pencil case
+  710: pencil sharpener
+  711: perfume
+  712: Petri dish
+  713: photocopier
+  714: plectrum
+  715: Pickelhaube
+  716: picket fence
+  717: pickup truck
+  718: pier
+  719: piggy bank
+  720: pill bottle
+  721: pillow
+  722: ping-pong ball
+  723: pinwheel
+  724: pirate ship
+  725: pitcher
+  726: hand plane
+  727: planetarium
+  728: plastic bag
+  729: plate rack
+  730: plow
+  731: plunger
+  732: Polaroid camera
+  733: pole
+  734: police van
+  735: poncho
+  736: billiard table
+  737: soda bottle
+  738: pot
+  739: potter's wheel
+  740: power drill
+  741: prayer rug
+  742: printer
+  743: prison
+  744: projectile
+  745: projector
+  746: hockey puck
+  747: punching bag
+  748: purse
+  749: quill
+  750: quilt
+  751: race car
+  752: racket
+  753: radiator
+  754: radio
+  755: radio telescope
+  756: rain barrel
+  757: recreational vehicle
+  758: reel
+  759: reflex camera
+  760: refrigerator
+  761: remote control
+  762: restaurant
+  763: revolver
+  764: rifle
+  765: rocking chair
+  766: rotisserie
+  767: eraser
+  768: rugby ball
+  769: ruler
+  770: running shoe
+  771: safe
+  772: safety pin
+  773: salt shaker
+  774: sandal
+  775: sarong
+  776: saxophone
+  777: scabbard
+  778: weighing scale
+  779: school bus
+  780: schooner
+  781: scoreboard
+  782: CRT screen
+  783: screw
+  784: screwdriver
+  785: seat belt
+  786: sewing machine
+  787: shield
+  788: shoe store
+  789: shoji
+  790: shopping basket
+  791: shopping cart
+  792: shovel
+  793: shower cap
+  794: shower curtain
+  795: ski
+  796: ski mask
+  797: sleeping bag
+  798: slide rule
+  799: sliding door
+  800: slot machine
+  801: snorkel
+  802: snowmobile
+  803: snowplow
+  804: soap dispenser
+  805: soccer ball
+  806: sock
+  807: solar thermal collector
+  808: sombrero
+  809: soup bowl
+  810: space bar
+  811: space heater
+  812: space shuttle
+  813: spatula
+  814: motorboat
+  815: spider web
+  816: spindle
+  817: sports car
+  818: spotlight
+  819: stage
+  820: steam locomotive
+  821: through arch bridge
+  822: steel drum
+  823: stethoscope
+  824: scarf
+  825: stone wall
+  826: stopwatch
+  827: stove
+  828: strainer
+  829: tram
+  830: stretcher
+  831: couch
+  832: stupa
+  833: submarine
+  834: suit
+  835: sundial
+  836: sunglass
+  837: sunglasses
+  838: sunscreen
+  839: suspension bridge
+  840: mop
+  841: sweatshirt
+  842: swimsuit
+  843: swing
+  844: switch
+  845: syringe
+  846: table lamp
+  847: tank
+  848: tape player
+  849: teapot
+  850: teddy bear
+  851: television
+  852: tennis ball
+  853: thatched roof
+  854: front curtain
+  855: thimble
+  856: threshing machine
+  857: throne
+  858: tile roof
+  859: toaster
+  860: tobacco shop
+  861: toilet seat
+  862: torch
+  863: totem pole
+  864: tow truck
+  865: toy store
+  866: tractor
+  867: semi-trailer truck
+  868: tray
+  869: trench coat
+  870: tricycle
+  871: trimaran
+  872: tripod
+  873: triumphal arch
+  874: trolleybus
+  875: trombone
+  876: tub
+  877: turnstile
+  878: typewriter keyboard
+  879: umbrella
+  880: unicycle
+  881: upright piano
+  882: vacuum cleaner
+  883: vase
+  884: vault
+  885: velvet
+  886: vending machine
+  887: vestment
+  888: viaduct
+  889: violin
+  890: volleyball
+  891: waffle iron
+  892: wall clock
+  893: wallet
+  894: wardrobe
+  895: military aircraft
+  896: sink
+  897: washing machine
+  898: water bottle
+  899: water jug
+  900: water tower
+  901: whiskey jug
+  902: whistle
+  903: wig
+  904: window screen
+  905: window shade
+  906: Windsor tie
+  907: wine bottle
+  908: wing
+  909: wok
+  910: wooden spoon
+  911: wool
+  912: split-rail fence
+  913: shipwreck
+  914: yawl
+  915: yurt
+  916: website
+  917: comic book
+  918: crossword
+  919: traffic sign
+  920: traffic light
+  921: dust jacket
+  922: menu
+  923: plate
+  924: guacamole
+  925: consomme
+  926: hot pot
+  927: trifle
+  928: ice cream
+  929: ice pop
+  930: baguette
+  931: bagel
+  932: pretzel
+  933: cheeseburger
+  934: hot dog
+  935: mashed potato
+  936: cabbage
+  937: broccoli
+  938: cauliflower
+  939: zucchini
+  940: spaghetti squash
+  941: acorn squash
+  942: butternut squash
+  943: cucumber
+  944: artichoke
+  945: bell pepper
+  946: cardoon
+  947: mushroom
+  948: Granny Smith
+  949: strawberry
+  950: orange
+  951: lemon
+  952: fig
+  953: pineapple
+  954: banana
+  955: jackfruit
+  956: custard apple
+  957: pomegranate
+  958: hay
+  959: carbonara
+  960: chocolate syrup
+  961: dough
+  962: meatloaf
+  963: pizza
+  964: pot pie
+  965: burrito
+  966: red wine
+  967: espresso
+  968: cup
+  969: eggnog
+  970: alp
+  971: bubble
+  972: cliff
+  973: coral reef
+  974: geyser
+  975: lakeshore
+  976: promontory
+  977: shoal
+  978: seashore
+  979: valley
+  980: volcano
+  981: baseball player
+  982: bridegroom
+  983: scuba diver
+  984: rapeseed
+  985: daisy
+  986: yellow lady's slipper
+  987: corn
+  988: acorn
+  989: rose hip
+  990: horse chestnut seed
+  991: coral fungus
+  992: agaric
+  993: gyromitra
+  994: stinkhorn mushroom
+  995: earth star
+  996: hen-of-the-woods
+  997: bolete
+  998: ear
+  999: toilet paper
+# Imagenet class codes to human-readable names
+map:
+  n01440764: tench
+  n01443537: goldfish
+  n01484850: great_white_shark
+  n01491361: tiger_shark
+  n01494475: hammerhead
+  n01496331: electric_ray
+  n01498041: stingray
+  n01514668: cock
+  n01514859: hen
+  n01518878: ostrich
+  n01530575: brambling
+  n01531178: goldfinch
+  n01532829: house_finch
+  n01534433: junco
+  n01537544: indigo_bunting
+  n01558993: robin
+  n01560419: bulbul
+  n01580077: jay
+  n01582220: magpie
+  n01592084: chickadee
+  n01601694: water_ouzel
+  n01608432: kite
+  n01614925: bald_eagle
+  n01616318: vulture
+  n01622779: great_grey_owl
+  n01629819: European_fire_salamander
+  n01630670: common_newt
+  n01631663: eft
+  n01632458: spotted_salamander
+  n01632777: axolotl
+  n01641577: bullfrog
+  n01644373: tree_frog
+  n01644900: tailed_frog
+  n01664065: loggerhead
+  n01665541: leatherback_turtle
+  n01667114: mud_turtle
+  n01667778: terrapin
+  n01669191: box_turtle
+  n01675722: banded_gecko
+  n01677366: common_iguana
+  n01682714: American_chameleon
+  n01685808: whiptail
+  n01687978: agama
+  n01688243: frilled_lizard
+  n01689811: alligator_lizard
+  n01692333: Gila_monster
+  n01693334: green_lizard
+  n01694178: African_chameleon
+  n01695060: Komodo_dragon
+  n01697457: African_crocodile
+  n01698640: American_alligator
+  n01704323: triceratops
+  n01728572: thunder_snake
+  n01728920: ringneck_snake
+  n01729322: hognose_snake
+  n01729977: green_snake
+  n01734418: king_snake
+  n01735189: garter_snake
+  n01737021: water_snake
+  n01739381: vine_snake
+  n01740131: night_snake
+  n01742172: boa_constrictor
+  n01744401: rock_python
+  n01748264: Indian_cobra
+  n01749939: green_mamba
+  n01751748: sea_snake
+  n01753488: horned_viper
+  n01755581: diamondback
+  n01756291: sidewinder
+  n01768244: trilobite
+  n01770081: harvestman
+  n01770393: scorpion
+  n01773157: black_and_gold_garden_spider
+  n01773549: barn_spider
+  n01773797: garden_spider
+  n01774384: black_widow
+  n01774750: tarantula
+  n01775062: wolf_spider
+  n01776313: tick
+  n01784675: centipede
+  n01795545: black_grouse
+  n01796340: ptarmigan
+  n01797886: ruffed_grouse
+  n01798484: prairie_chicken
+  n01806143: peacock
+  n01806567: quail
+  n01807496: partridge
+  n01817953: African_grey
+  n01818515: macaw
+  n01819313: sulphur-crested_cockatoo
+  n01820546: lorikeet
+  n01824575: coucal
+  n01828970: bee_eater
+  n01829413: hornbill
+  n01833805: hummingbird
+  n01843065: jacamar
+  n01843383: toucan
+  n01847000: drake
+  n01855032: red-breasted_merganser
+  n01855672: goose
+  n01860187: black_swan
+  n01871265: tusker
+  n01872401: echidna
+  n01873310: platypus
+  n01877812: wallaby
+  n01882714: koala
+  n01883070: wombat
+  n01910747: jellyfish
+  n01914609: sea_anemone
+  n01917289: brain_coral
+  n01924916: flatworm
+  n01930112: nematode
+  n01943899: conch
+  n01944390: snail
+  n01945685: slug
+  n01950731: sea_slug
+  n01955084: chiton
+  n01968897: chambered_nautilus
+  n01978287: Dungeness_crab
+  n01978455: rock_crab
+  n01980166: fiddler_crab
+  n01981276: king_crab
+  n01983481: American_lobster
+  n01984695: spiny_lobster
+  n01985128: crayfish
+  n01986214: hermit_crab
+  n01990800: isopod
+  n02002556: white_stork
+  n02002724: black_stork
+  n02006656: spoonbill
+  n02007558: flamingo
+  n02009229: little_blue_heron
+  n02009912: American_egret
+  n02011460: bittern
+  n02012849: crane_(bird)
+  n02013706: limpkin
+  n02017213: European_gallinule
+  n02018207: American_coot
+  n02018795: bustard
+  n02025239: ruddy_turnstone
+  n02027492: red-backed_sandpiper
+  n02028035: redshank
+  n02033041: dowitcher
+  n02037110: oystercatcher
+  n02051845: pelican
+  n02056570: king_penguin
+  n02058221: albatross
+  n02066245: grey_whale
+  n02071294: killer_whale
+  n02074367: dugong
+  n02077923: sea_lion
+  n02085620: Chihuahua
+  n02085782: Japanese_spaniel
+  n02085936: Maltese_dog
+  n02086079: Pekinese
+  n02086240: Shih-Tzu
+  n02086646: Blenheim_spaniel
+  n02086910: papillon
+  n02087046: toy_terrier
+  n02087394: Rhodesian_ridgeback
+  n02088094: Afghan_hound
+  n02088238: basset
+  n02088364: beagle
+  n02088466: bloodhound
+  n02088632: bluetick
+  n02089078: black-and-tan_coonhound
+  n02089867: Walker_hound
+  n02089973: English_foxhound
+  n02090379: redbone
+  n02090622: borzoi
+  n02090721: Irish_wolfhound
+  n02091032: Italian_greyhound
+  n02091134: whippet
+  n02091244: Ibizan_hound
+  n02091467: Norwegian_elkhound
+  n02091635: otterhound
+  n02091831: Saluki
+  n02092002: Scottish_deerhound
+  n02092339: Weimaraner
+  n02093256: Staffordshire_bullterrier
+  n02093428: American_Staffordshire_terrier
+  n02093647: Bedlington_terrier
+  n02093754: Border_terrier
+  n02093859: Kerry_blue_terrier
+  n02093991: Irish_terrier
+  n02094114: Norfolk_terrier
+  n02094258: Norwich_terrier
+  n02094433: Yorkshire_terrier
+  n02095314: wire-haired_fox_terrier
+  n02095570: Lakeland_terrier
+  n02095889: Sealyham_terrier
+  n02096051: Airedale
+  n02096177: cairn
+  n02096294: Australian_terrier
+  n02096437: Dandie_Dinmont
+  n02096585: Boston_bull
+  n02097047: miniature_schnauzer
+  n02097130: giant_schnauzer
+  n02097209: standard_schnauzer
+  n02097298: Scotch_terrier
+  n02097474: Tibetan_terrier
+  n02097658: silky_terrier
+  n02098105: soft-coated_wheaten_terrier
+  n02098286: West_Highland_white_terrier
+  n02098413: Lhasa
+  n02099267: flat-coated_retriever
+  n02099429: curly-coated_retriever
+  n02099601: golden_retriever
+  n02099712: Labrador_retriever
+  n02099849: Chesapeake_Bay_retriever
+  n02100236: German_short-haired_pointer
+  n02100583: vizsla
+  n02100735: English_setter
+  n02100877: Irish_setter
+  n02101006: Gordon_setter
+  n02101388: Brittany_spaniel
+  n02101556: clumber
+  n02102040: English_springer
+  n02102177: Welsh_springer_spaniel
+  n02102318: cocker_spaniel
+  n02102480: Sussex_spaniel
+  n02102973: Irish_water_spaniel
+  n02104029: kuvasz
+  n02104365: schipperke
+  n02105056: groenendael
+  n02105162: malinois
+  n02105251: briard
+  n02105412: kelpie
+  n02105505: komondor
+  n02105641: Old_English_sheepdog
+  n02105855: Shetland_sheepdog
+  n02106030: collie
+  n02106166: Border_collie
+  n02106382: Bouvier_des_Flandres
+  n02106550: Rottweiler
+  n02106662: German_shepherd
+  n02107142: Doberman
+  n02107312: miniature_pinscher
+  n02107574: Greater_Swiss_Mountain_dog
+  n02107683: Bernese_mountain_dog
+  n02107908: Appenzeller
+  n02108000: EntleBucher
+  n02108089: boxer
+  n02108422: bull_mastiff
+  n02108551: Tibetan_mastiff
+  n02108915: French_bulldog
+  n02109047: Great_Dane
+  n02109525: Saint_Bernard
+  n02109961: Eskimo_dog
+  n02110063: malamute
+  n02110185: Siberian_husky
+  n02110341: dalmatian
+  n02110627: affenpinscher
+  n02110806: basenji
+  n02110958: pug
+  n02111129: Leonberg
+  n02111277: Newfoundland
+  n02111500: Great_Pyrenees
+  n02111889: Samoyed
+  n02112018: Pomeranian
+  n02112137: chow
+  n02112350: keeshond
+  n02112706: Brabancon_griffon
+  n02113023: Pembroke
+  n02113186: Cardigan
+  n02113624: toy_poodle
+  n02113712: miniature_poodle
+  n02113799: standard_poodle
+  n02113978: Mexican_hairless
+  n02114367: timber_wolf
+  n02114548: white_wolf
+  n02114712: red_wolf
+  n02114855: coyote
+  n02115641: dingo
+  n02115913: dhole
+  n02116738: African_hunting_dog
+  n02117135: hyena
+  n02119022: red_fox
+  n02119789: kit_fox
+  n02120079: Arctic_fox
+  n02120505: grey_fox
+  n02123045: tabby
+  n02123159: tiger_cat
+  n02123394: Persian_cat
+  n02123597: Siamese_cat
+  n02124075: Egyptian_cat
+  n02125311: cougar
+  n02127052: lynx
+  n02128385: leopard
+  n02128757: snow_leopard
+  n02128925: jaguar
+  n02129165: lion
+  n02129604: tiger
+  n02130308: cheetah
+  n02132136: brown_bear
+  n02133161: American_black_bear
+  n02134084: ice_bear
+  n02134418: sloth_bear
+  n02137549: mongoose
+  n02138441: meerkat
+  n02165105: tiger_beetle
+  n02165456: ladybug
+  n02167151: ground_beetle
+  n02168699: long-horned_beetle
+  n02169497: leaf_beetle
+  n02172182: dung_beetle
+  n02174001: rhinoceros_beetle
+  n02177972: weevil
+  n02190166: fly
+  n02206856: bee
+  n02219486: ant
+  n02226429: grasshopper
+  n02229544: cricket
+  n02231487: walking_stick
+  n02233338: cockroach
+  n02236044: mantis
+  n02256656: cicada
+  n02259212: leafhopper
+  n02264363: lacewing
+  n02268443: dragonfly
+  n02268853: damselfly
+  n02276258: admiral
+  n02277742: ringlet
+  n02279972: monarch
+  n02280649: cabbage_butterfly
+  n02281406: sulphur_butterfly
+  n02281787: lycaenid
+  n02317335: starfish
+  n02319095: sea_urchin
+  n02321529: sea_cucumber
+  n02325366: wood_rabbit
+  n02326432: hare
+  n02328150: Angora
+  n02342885: hamster
+  n02346627: porcupine
+  n02356798: fox_squirrel
+  n02361337: marmot
+  n02363005: beaver
+  n02364673: guinea_pig
+  n02389026: sorrel
+  n02391049: zebra
+  n02395406: hog
+  n02396427: wild_boar
+  n02397096: warthog
+  n02398521: hippopotamus
+  n02403003: ox
+  n02408429: water_buffalo
+  n02410509: bison
+  n02412080: ram
+  n02415577: bighorn
+  n02417914: ibex
+  n02422106: hartebeest
+  n02422699: impala
+  n02423022: gazelle
+  n02437312: Arabian_camel
+  n02437616: llama
+  n02441942: weasel
+  n02442845: mink
+  n02443114: polecat
+  n02443484: black-footed_ferret
+  n02444819: otter
+  n02445715: skunk
+  n02447366: badger
+  n02454379: armadillo
+  n02457408: three-toed_sloth
+  n02480495: orangutan
+  n02480855: gorilla
+  n02481823: chimpanzee
+  n02483362: gibbon
+  n02483708: siamang
+  n02484975: guenon
+  n02486261: patas
+  n02486410: baboon
+  n02487347: macaque
+  n02488291: langur
+  n02488702: colobus
+  n02489166: proboscis_monkey
+  n02490219: marmoset
+  n02492035: capuchin
+  n02492660: howler_monkey
+  n02493509: titi
+  n02493793: spider_monkey
+  n02494079: squirrel_monkey
+  n02497673: Madagascar_cat
+  n02500267: indri
+  n02504013: Indian_elephant
+  n02504458: African_elephant
+  n02509815: lesser_panda
+  n02510455: giant_panda
+  n02514041: barracouta
+  n02526121: eel
+  n02536864: coho
+  n02606052: rock_beauty
+  n02607072: anemone_fish
+  n02640242: sturgeon
+  n02641379: gar
+  n02643566: lionfish
+  n02655020: puffer
+  n02666196: abacus
+  n02667093: abaya
+  n02669723: academic_gown
+  n02672831: accordion
+  n02676566: acoustic_guitar
+  n02687172: aircraft_carrier
+  n02690373: airliner
+  n02692877: airship
+  n02699494: altar
+  n02701002: ambulance
+  n02704792: amphibian
+  n02708093: analog_clock
+  n02727426: apiary
+  n02730930: apron
+  n02747177: ashcan
+  n02749479: assault_rifle
+  n02769748: backpack
+  n02776631: bakery
+  n02777292: balance_beam
+  n02782093: balloon
+  n02783161: ballpoint
+  n02786058: Band_Aid
+  n02787622: banjo
+  n02788148: bannister
+  n02790996: barbell
+  n02791124: barber_chair
+  n02791270: barbershop
+  n02793495: barn
+  n02794156: barometer
+  n02795169: barrel
+  n02797295: barrow
+  n02799071: baseball
+  n02802426: basketball
+  n02804414: bassinet
+  n02804610: bassoon
+  n02807133: bathing_cap
+  n02808304: bath_towel
+  n02808440: bathtub
+  n02814533: beach_wagon
+  n02814860: beacon
+  n02815834: beaker
+  n02817516: bearskin
+  n02823428: beer_bottle
+  n02823750: beer_glass
+  n02825657: bell_cote
+  n02834397: bib
+  n02835271: bicycle-built-for-two
+  n02837789: bikini
+  n02840245: binder
+  n02841315: binoculars
+  n02843684: birdhouse
+  n02859443: boathouse
+  n02860847: bobsled
+  n02865351: bolo_tie
+  n02869837: bonnet
+  n02870880: bookcase
+  n02871525: bookshop
+  n02877765: bottlecap
+  n02879718: bow
+  n02883205: bow_tie
+  n02892201: brass
+  n02892767: brassiere
+  n02894605: breakwater
+  n02895154: breastplate
+  n02906734: broom
+  n02909870: bucket
+  n02910353: buckle
+  n02916936: bulletproof_vest
+  n02917067: bullet_train
+  n02927161: butcher_shop
+  n02930766: cab
+  n02939185: caldron
+  n02948072: candle
+  n02950826: cannon
+  n02951358: canoe
+  n02951585: can_opener
+  n02963159: cardigan
+  n02965783: car_mirror
+  n02966193: carousel
+  n02966687: carpenter's_kit
+  n02971356: carton
+  n02974003: car_wheel
+  n02977058: cash_machine
+  n02978881: cassette
+  n02979186: cassette_player
+  n02980441: castle
+  n02981792: catamaran
+  n02988304: CD_player
+  n02992211: cello
+  n02992529: cellular_telephone
+  n02999410: chain
+  n03000134: chainlink_fence
+  n03000247: chain_mail
+  n03000684: chain_saw
+  n03014705: chest
+  n03016953: chiffonier
+  n03017168: chime
+  n03018349: china_cabinet
+  n03026506: Christmas_stocking
+  n03028079: church
+  n03032252: cinema
+  n03041632: cleaver
+  n03042490: cliff_dwelling
+  n03045698: cloak
+  n03047690: clog
+  n03062245: cocktail_shaker
+  n03063599: coffee_mug
+  n03063689: coffeepot
+  n03065424: coil
+  n03075370: combination_lock
+  n03085013: computer_keyboard
+  n03089624: confectionery
+  n03095699: container_ship
+  n03100240: convertible
+  n03109150: corkscrew
+  n03110669: cornet
+  n03124043: cowboy_boot
+  n03124170: cowboy_hat
+  n03125729: cradle
+  n03126707: crane_(machine)
+  n03127747: crash_helmet
+  n03127925: crate
+  n03131574: crib
+  n03133878: Crock_Pot
+  n03134739: croquet_ball
+  n03141823: crutch
+  n03146219: cuirass
+  n03160309: dam
+  n03179701: desk
+  n03180011: desktop_computer
+  n03187595: dial_telephone
+  n03188531: diaper
+  n03196217: digital_clock
+  n03197337: digital_watch
+  n03201208: dining_table
+  n03207743: dishrag
+  n03207941: dishwasher
+  n03208938: disk_brake
+  n03216828: dock
+  n03218198: dogsled
+  n03220513: dome
+  n03223299: doormat
+  n03240683: drilling_platform
+  n03249569: drum
+  n03250847: drumstick
+  n03255030: dumbbell
+  n03259280: Dutch_oven
+  n03271574: electric_fan
+  n03272010: electric_guitar
+  n03272562: electric_locomotive
+  n03290653: entertainment_center
+  n03291819: envelope
+  n03297495: espresso_maker
+  n03314780: face_powder
+  n03325584: feather_boa
+  n03337140: file
+  n03344393: fireboat
+  n03345487: fire_engine
+  n03347037: fire_screen
+  n03355925: flagpole
+  n03372029: flute
+  n03376595: folding_chair
+  n03379051: football_helmet
+  n03384352: forklift
+  n03388043: fountain
+  n03388183: fountain_pen
+  n03388549: four-poster
+  n03393912: freight_car
+  n03394916: French_horn
+  n03400231: frying_pan
+  n03404251: fur_coat
+  n03417042: garbage_truck
+  n03424325: gasmask
+  n03425413: gas_pump
+  n03443371: goblet
+  n03444034: go-kart
+  n03445777: golf_ball
+  n03445924: golfcart
+  n03447447: gondola
+  n03447721: gong
+  n03450230: gown
+  n03452741: grand_piano
+  n03457902: greenhouse
+  n03459775: grille
+  n03461385: grocery_store
+  n03467068: guillotine
+  n03476684: hair_slide
+  n03476991: hair_spray
+  n03478589: half_track
+  n03481172: hammer
+  n03482405: hamper
+  n03483316: hand_blower
+  n03485407: hand-held_computer
+  n03485794: handkerchief
+  n03492542: hard_disc
+  n03494278: harmonica
+  n03495258: harp
+  n03496892: harvester
+  n03498962: hatchet
+  n03527444: holster
+  n03529860: home_theater
+  n03530642: honeycomb
+  n03532672: hook
+  n03534580: hoopskirt
+  n03535780: horizontal_bar
+  n03538406: horse_cart
+  n03544143: hourglass
+  n03584254: iPod
+  n03584829: iron
+  n03590841: jack-o'-lantern
+  n03594734: jean
+  n03594945: jeep
+  n03595614: jersey
+  n03598930: jigsaw_puzzle
+  n03599486: jinrikisha
+  n03602883: joystick
+  n03617480: kimono
+  n03623198: knee_pad
+  n03627232: knot
+  n03630383: lab_coat
+  n03633091: ladle
+  n03637318: lampshade
+  n03642806: laptop
+  n03649909: lawn_mower
+  n03657121: lens_cap
+  n03658185: letter_opener
+  n03661043: library
+  n03662601: lifeboat
+  n03666591: lighter
+  n03670208: limousine
+  n03673027: liner
+  n03676483: lipstick
+  n03680355: Loafer
+  n03690938: lotion
+  n03691459: loudspeaker
+  n03692522: loupe
+  n03697007: lumbermill
+  n03706229: magnetic_compass
+  n03709823: mailbag
+  n03710193: mailbox
+  n03710637: maillot_(tights)
+  n03710721: maillot_(tank_suit)
+  n03717622: manhole_cover
+  n03720891: maraca
+  n03721384: marimba
+  n03724870: mask
+  n03729826: matchstick
+  n03733131: maypole
+  n03733281: maze
+  n03733805: measuring_cup
+  n03742115: medicine_chest
+  n03743016: megalith
+  n03759954: microphone
+  n03761084: microwave
+  n03763968: military_uniform
+  n03764736: milk_can
+  n03769881: minibus
+  n03770439: miniskirt
+  n03770679: minivan
+  n03773504: missile
+  n03775071: mitten
+  n03775546: mixing_bowl
+  n03776460: mobile_home
+  n03777568: Model_T
+  n03777754: modem
+  n03781244: monastery
+  n03782006: monitor
+  n03785016: moped
+  n03786901: mortar
+  n03787032: mortarboard
+  n03788195: mosque
+  n03788365: mosquito_net
+  n03791053: motor_scooter
+  n03792782: mountain_bike
+  n03792972: mountain_tent
+  n03793489: mouse
+  n03794056: mousetrap
+  n03796401: moving_van
+  n03803284: muzzle
+  n03804744: nail
+  n03814639: neck_brace
+  n03814906: necklace
+  n03825788: nipple
+  n03832673: notebook
+  n03837869: obelisk
+  n03838899: oboe
+  n03840681: ocarina
+  n03841143: odometer
+  n03843555: oil_filter
+  n03854065: organ
+  n03857828: oscilloscope
+  n03866082: overskirt
+  n03868242: oxcart
+  n03868863: oxygen_mask
+  n03871628: packet
+  n03873416: paddle
+  n03874293: paddlewheel
+  n03874599: padlock
+  n03876231: paintbrush
+  n03877472: pajama
+  n03877845: palace
+  n03884397: panpipe
+  n03887697: paper_towel
+  n03888257: parachute
+  n03888605: parallel_bars
+  n03891251: park_bench
+  n03891332: parking_meter
+  n03895866: passenger_car
+  n03899768: patio
+  n03902125: pay-phone
+  n03903868: pedestal
+  n03908618: pencil_box
+  n03908714: pencil_sharpener
+  n03916031: perfume
+  n03920288: Petri_dish
+  n03924679: photocopier
+  n03929660: pick
+  n03929855: pickelhaube
+  n03930313: picket_fence
+  n03930630: pickup
+  n03933933: pier
+  n03935335: piggy_bank
+  n03937543: pill_bottle
+  n03938244: pillow
+  n03942813: ping-pong_ball
+  n03944341: pinwheel
+  n03947888: pirate
+  n03950228: pitcher
+  n03954731: plane
+  n03956157: planetarium
+  n03958227: plastic_bag
+  n03961711: plate_rack
+  n03967562: plow
+  n03970156: plunger
+  n03976467: Polaroid_camera
+  n03976657: pole
+  n03977966: police_van
+  n03980874: poncho
+  n03982430: pool_table
+  n03983396: pop_bottle
+  n03991062: pot
+  n03992509: potter's_wheel
+  n03995372: power_drill
+  n03998194: prayer_rug
+  n04004767: printer
+  n04005630: prison
+  n04008634: projectile
+  n04009552: projector
+  n04019541: puck
+  n04023962: punching_bag
+  n04026417: purse
+  n04033901: quill
+  n04033995: quilt
+  n04037443: racer
+  n04039381: racket
+  n04040759: radiator
+  n04041544: radio
+  n04044716: radio_telescope
+  n04049303: rain_barrel
+  n04065272: recreational_vehicle
+  n04067472: reel
+  n04069434: reflex_camera
+  n04070727: refrigerator
+  n04074963: remote_control
+  n04081281: restaurant
+  n04086273: revolver
+  n04090263: rifle
+  n04099969: rocking_chair
+  n04111531: rotisserie
+  n04116512: rubber_eraser
+  n04118538: rugby_ball
+  n04118776: rule
+  n04120489: running_shoe
+  n04125021: safe
+  n04127249: safety_pin
+  n04131690: saltshaker
+  n04133789: sandal
+  n04136333: sarong
+  n04141076: sax
+  n04141327: scabbard
+  n04141975: scale
+  n04146614: school_bus
+  n04147183: schooner
+  n04149813: scoreboard
+  n04152593: screen
+  n04153751: screw
+  n04154565: screwdriver
+  n04162706: seat_belt
+  n04179913: sewing_machine
+  n04192698: shield
+  n04200800: shoe_shop
+  n04201297: shoji
+  n04204238: shopping_basket
+  n04204347: shopping_cart
+  n04208210: shovel
+  n04209133: shower_cap
+  n04209239: shower_curtain
+  n04228054: ski
+  n04229816: ski_mask
+  n04235860: sleeping_bag
+  n04238763: slide_rule
+  n04239074: sliding_door
+  n04243546: slot
+  n04251144: snorkel
+  n04252077: snowmobile
+  n04252225: snowplow
+  n04254120: soap_dispenser
+  n04254680: soccer_ball
+  n04254777: sock
+  n04258138: solar_dish
+  n04259630: sombrero
+  n04263257: soup_bowl
+  n04264628: space_bar
+  n04265275: space_heater
+  n04266014: space_shuttle
+  n04270147: spatula
+  n04273569: speedboat
+  n04275548: spider_web
+  n04277352: spindle
+  n04285008: sports_car
+  n04286575: spotlight
+  n04296562: stage
+  n04310018: steam_locomotive
+  n04311004: steel_arch_bridge
+  n04311174: steel_drum
+  n04317175: stethoscope
+  n04325704: stole
+  n04326547: stone_wall
+  n04328186: stopwatch
+  n04330267: stove
+  n04332243: strainer
+  n04335435: streetcar
+  n04336792: stretcher
+  n04344873: studio_couch
+  n04346328: stupa
+  n04347754: submarine
+  n04350905: suit
+  n04355338: sundial
+  n04355933: sunglass
+  n04356056: sunglasses
+  n04357314: sunscreen
+  n04366367: suspension_bridge
+  n04367480: swab
+  n04370456: sweatshirt
+  n04371430: swimming_trunks
+  n04371774: swing
+  n04372370: switch
+  n04376876: syringe
+  n04380533: table_lamp
+  n04389033: tank
+  n04392985: tape_player
+  n04398044: teapot
+  n04399382: teddy
+  n04404412: television
+  n04409515: tennis_ball
+  n04417672: thatch
+  n04418357: theater_curtain
+  n04423845: thimble
+  n04428191: thresher
+  n04429376: throne
+  n04435653: tile_roof
+  n04442312: toaster
+  n04443257: tobacco_shop
+  n04447861: toilet_seat
+  n04456115: torch
+  n04458633: totem_pole
+  n04461696: tow_truck
+  n04462240: toyshop
+  n04465501: tractor
+  n04467665: trailer_truck
+  n04476259: tray
+  n04479046: trench_coat
+  n04482393: tricycle
+  n04483307: trimaran
+  n04485082: tripod
+  n04486054: triumphal_arch
+  n04487081: trolleybus
+  n04487394: trombone
+  n04493381: tub
+  n04501370: turnstile
+  n04505470: typewriter_keyboard
+  n04507155: umbrella
+  n04509417: unicycle
+  n04515003: upright
+  n04517823: vacuum
+  n04522168: vase
+  n04523525: vault
+  n04525038: velvet
+  n04525305: vending_machine
+  n04532106: vestment
+  n04532670: viaduct
+  n04536866: violin
+  n04540053: volleyball
+  n04542943: waffle_iron
+  n04548280: wall_clock
+  n04548362: wallet
+  n04550184: wardrobe
+  n04552348: warplane
+  n04553703: washbasin
+  n04554684: washer
+  n04557648: water_bottle
+  n04560804: water_jug
+  n04562935: water_tower
+  n04579145: whiskey_jug
+  n04579432: whistle
+  n04584207: wig
+  n04589890: window_screen
+  n04590129: window_shade
+  n04591157: Windsor_tie
+  n04591713: wine_bottle
+  n04592741: wing
+  n04596742: wok
+  n04597913: wooden_spoon
+  n04599235: wool
+  n04604644: worm_fence
+  n04606251: wreck
+  n04612504: yawl
+  n04613696: yurt
+  n06359193: web_site
+  n06596364: comic_book
+  n06785654: crossword_puzzle
+  n06794110: street_sign
+  n06874185: traffic_light
+  n07248320: book_jacket
+  n07565083: menu
+  n07579787: plate
+  n07583066: guacamole
+  n07584110: consomme
+  n07590611: hot_pot
+  n07613480: trifle
+  n07614500: ice_cream
+  n07615774: ice_lolly
+  n07684084: French_loaf
+  n07693725: bagel
+  n07695742: pretzel
+  n07697313: cheeseburger
+  n07697537: hotdog
+  n07711569: mashed_potato
+  n07714571: head_cabbage
+  n07714990: broccoli
+  n07715103: cauliflower
+  n07716358: zucchini
+  n07716906: spaghetti_squash
+  n07717410: acorn_squash
+  n07717556: butternut_squash
+  n07718472: cucumber
+  n07718747: artichoke
+  n07720875: bell_pepper
+  n07730033: cardoon
+  n07734744: mushroom
+  n07742313: Granny_Smith
+  n07745940: strawberry
+  n07747607: orange
+  n07749582: lemon
+  n07753113: fig
+  n07753275: pineapple
+  n07753592: banana
+  n07754684: jackfruit
+  n07760859: custard_apple
+  n07768694: pomegranate
+  n07802026: hay
+  n07831146: carbonara
+  n07836838: chocolate_sauce
+  n07860988: dough
+  n07871810: meat_loaf
+  n07873807: pizza
+  n07875152: potpie
+  n07880968: burrito
+  n07892512: red_wine
+  n07920052: espresso
+  n07930864: cup
+  n07932039: eggnog
+  n09193705: alp
+  n09229709: bubble
+  n09246464: cliff
+  n09256479: coral_reef
+  n09288635: geyser
+  n09332890: lakeside
+  n09399592: promontory
+  n09421951: sandbar
+  n09428293: seashore
+  n09468604: valley
+  n09472597: volcano
+  n09835506: ballplayer
+  n10148035: groom
+  n10565667: scuba_diver
+  n11879895: rapeseed
+  n11939491: daisy
+  n12057211: yellow_lady's_slipper
+  n12144580: corn
+  n12267677: acorn
+  n12620546: hip
+  n12768682: buckeye
+  n12985857: coral_fungus
+  n12998815: agaric
+  n13037406: gyromitra
+  n13040303: stinkhorn
+  n13044778: earthstar
+  n13052670: hen-of-the-woods
+  n13054560: bolete
+  n13133613: ear
+  n15075141: toilet_tissue
+# Download script/URL (optional)
+download: yolo/data/scripts/get_imagenet.sh

ultralytics/cfg/datasets/Objects365.yaml ADDED Viewed

	@@ -0,0 +1,443 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Objects365 dataset https://www.objects365.org/ by Megvii
+# Example usage: yolo train data=Objects365.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── Objects365  ← downloads here (712 GB = 367G data + 345G zips)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/Objects365  # dataset root dir
+train: images/train  # train images (relative to 'path') 1742289 images
+val: images/val # val images (relative to 'path') 80000 images
+test:  # test images (optional)
+# Classes
+names:
+  0: Person
+  1: Sneakers
+  2: Chair
+  3: Other Shoes
+  4: Hat
+  5: Car
+  6: Lamp
+  7: Glasses
+  8: Bottle
+  9: Desk
+  10: Cup
+  11: Street Lights
+  12: Cabinet/shelf
+  13: Handbag/Satchel
+  14: Bracelet
+  15: Plate
+  16: Picture/Frame
+  17: Helmet
+  18: Book
+  19: Gloves
+  20: Storage box
+  21: Boat
+  22: Leather Shoes
+  23: Flower
+  24: Bench
+  25: Potted Plant
+  26: Bowl/Basin
+  27: Flag
+  28: Pillow
+  29: Boots
+  30: Vase
+  31: Microphone
+  32: Necklace
+  33: Ring
+  34: SUV
+  35: Wine Glass
+  36: Belt
+  37: Monitor/TV
+  38: Backpack
+  39: Umbrella
+  40: Traffic Light
+  41: Speaker
+  42: Watch
+  43: Tie
+  44: Trash bin Can
+  45: Slippers
+  46: Bicycle
+  47: Stool
+  48: Barrel/bucket
+  49: Van
+  50: Couch
+  51: Sandals
+  52: Basket
+  53: Drum
+  54: Pen/Pencil
+  55: Bus
+  56: Wild Bird
+  57: High Heels
+  58: Motorcycle
+  59: Guitar
+  60: Carpet
+  61: Cell Phone
+  62: Bread
+  63: Camera
+  64: Canned
+  65: Truck
+  66: Traffic cone
+  67: Cymbal
+  68: Lifesaver
+  69: Towel
+  70: Stuffed Toy
+  71: Candle
+  72: Sailboat
+  73: Laptop
+  74: Awning
+  75: Bed
+  76: Faucet
+  77: Tent
+  78: Horse
+  79: Mirror
+  80: Power outlet
+  81: Sink
+  82: Apple
+  83: Air Conditioner
+  84: Knife
+  85: Hockey Stick
+  86: Paddle
+  87: Pickup Truck
+  88: Fork
+  89: Traffic Sign
+  90: Balloon
+  91: Tripod
+  92: Dog
+  93: Spoon
+  94: Clock
+  95: Pot
+  96: Cow
+  97: Cake
+  98: Dinning Table
+  99: Sheep
+  100: Hanger
+  101: Blackboard/Whiteboard
+  102: Napkin
+  103: Other Fish
+  104: Orange/Tangerine
+  105: Toiletry
+  106: Keyboard
+  107: Tomato
+  108: Lantern
+  109: Machinery Vehicle
+  110: Fan
+  111: Green Vegetables
+  112: Banana
+  113: Baseball Glove
+  114: Airplane
+  115: Mouse
+  116: Train
+  117: Pumpkin
+  118: Soccer
+  119: Skiboard
+  120: Luggage
+  121: Nightstand
+  122: Tea pot
+  123: Telephone
+  124: Trolley
+  125: Head Phone
+  126: Sports Car
+  127: Stop Sign
+  128: Dessert
+  129: Scooter
+  130: Stroller
+  131: Crane
+  132: Remote
+  133: Refrigerator
+  134: Oven
+  135: Lemon
+  136: Duck
+  137: Baseball Bat
+  138: Surveillance Camera
+  139: Cat
+  140: Jug
+  141: Broccoli
+  142: Piano
+  143: Pizza
+  144: Elephant
+  145: Skateboard
+  146: Surfboard
+  147: Gun
+  148: Skating and Skiing shoes
+  149: Gas stove
+  150: Donut
+  151: Bow Tie
+  152: Carrot
+  153: Toilet
+  154: Kite
+  155: Strawberry
+  156: Other Balls
+  157: Shovel
+  158: Pepper
+  159: Computer Box
+  160: Toilet Paper
+  161: Cleaning Products
+  162: Chopsticks
+  163: Microwave
+  164: Pigeon
+  165: Baseball
+  166: Cutting/chopping Board
+  167: Coffee Table
+  168: Side Table
+  169: Scissors
+  170: Marker
+  171: Pie
+  172: Ladder
+  173: Snowboard
+  174: Cookies
+  175: Radiator
+  176: Fire Hydrant
+  177: Basketball
+  178: Zebra
+  179: Grape
+  180: Giraffe
+  181: Potato
+  182: Sausage
+  183: Tricycle
+  184: Violin
+  185: Egg
+  186: Fire Extinguisher
+  187: Candy
+  188: Fire Truck
+  189: Billiards
+  190: Converter
+  191: Bathtub
+  192: Wheelchair
+  193: Golf Club
+  194: Briefcase
+  195: Cucumber
+  196: Cigar/Cigarette
+  197: Paint Brush
+  198: Pear
+  199: Heavy Truck
+  200: Hamburger
+  201: Extractor
+  202: Extension Cord
+  203: Tong
+  204: Tennis Racket
+  205: Folder
+  206: American Football
+  207: earphone
+  208: Mask
+  209: Kettle
+  210: Tennis
+  211: Ship
+  212: Swing
+  213: Coffee Machine
+  214: Slide
+  215: Carriage
+  216: Onion
+  217: Green beans
+  218: Projector
+  219: Frisbee
+  220: Washing Machine/Drying Machine
+  221: Chicken
+  222: Printer
+  223: Watermelon
+  224: Saxophone
+  225: Tissue
+  226: Toothbrush
+  227: Ice cream
+  228: Hot-air balloon
+  229: Cello
+  230: French Fries
+  231: Scale
+  232: Trophy
+  233: Cabbage
+  234: Hot dog
+  235: Blender
+  236: Peach
+  237: Rice
+  238: Wallet/Purse
+  239: Volleyball
+  240: Deer
+  241: Goose
+  242: Tape
+  243: Tablet
+  244: Cosmetics
+  245: Trumpet
+  246: Pineapple
+  247: Golf Ball
+  248: Ambulance
+  249: Parking meter
+  250: Mango
+  251: Key
+  252: Hurdle
+  253: Fishing Rod
+  254: Medal
+  255: Flute
+  256: Brush
+  257: Penguin
+  258: Megaphone
+  259: Corn
+  260: Lettuce
+  261: Garlic
+  262: Swan
+  263: Helicopter
+  264: Green Onion
+  265: Sandwich
+  266: Nuts
+  267: Speed Limit Sign
+  268: Induction Cooker
+  269: Broom
+  270: Trombone
+  271: Plum
+  272: Rickshaw
+  273: Goldfish
+  274: Kiwi fruit
+  275: Router/modem
+  276: Poker Card
+  277: Toaster
+  278: Shrimp
+  279: Sushi
+  280: Cheese
+  281: Notepaper
+  282: Cherry
+  283: Pliers
+  284: CD
+  285: Pasta
+  286: Hammer
+  287: Cue
+  288: Avocado
+  289: Hamimelon
+  290: Flask
+  291: Mushroom
+  292: Screwdriver
+  293: Soap
+  294: Recorder
+  295: Bear
+  296: Eggplant
+  297: Board Eraser
+  298: Coconut
+  299: Tape Measure/Ruler
+  300: Pig
+  301: Showerhead
+  302: Globe
+  303: Chips
+  304: Steak
+  305: Crosswalk Sign
+  306: Stapler
+  307: Camel
+  308: Formula 1
+  309: Pomegranate
+  310: Dishwasher
+  311: Crab
+  312: Hoverboard
+  313: Meat ball
+  314: Rice Cooker
+  315: Tuba
+  316: Calculator
+  317: Papaya
+  318: Antelope
+  319: Parrot
+  320: Seal
+  321: Butterfly
+  322: Dumbbell
+  323: Donkey
+  324: Lion
+  325: Urinal
+  326: Dolphin
+  327: Electric Drill
+  328: Hair Dryer
+  329: Egg tart
+  330: Jellyfish
+  331: Treadmill
+  332: Lighter
+  333: Grapefruit
+  334: Game board
+  335: Mop
+  336: Radish
+  337: Baozi
+  338: Target
+  339: French
+  340: Spring Rolls
+  341: Monkey
+  342: Rabbit
+  343: Pencil Case
+  344: Yak
+  345: Red Cabbage
+  346: Binoculars
+  347: Asparagus
+  348: Barbell
+  349: Scallop
+  350: Noddles
+  351: Comb
+  352: Dumpling
+  353: Oyster
+  354: Table Tennis paddle
+  355: Cosmetics Brush/Eyeliner Pencil
+  356: Chainsaw
+  357: Eraser
+  358: Lobster
+  359: Durian
+  360: Okra
+  361: Lipstick
+  362: Cosmetics Mirror
+  363: Curling
+  364: Table Tennis
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from tqdm import tqdm
+  from ultralytics.utils.checks import check_requirements
+  from ultralytics.utils.downloads import download
+  from ultralytics.utils.ops import xyxy2xywhn
+  import numpy as np
+  from pathlib import Path
+  check_requirements(('pycocotools>=2.0',))
+  from pycocotools.coco import COCO
+  # Make Directories
+  dir = Path(yaml['path'])  # dataset root dir
+  for p in 'images', 'labels':
+      (dir / p).mkdir(parents=True, exist_ok=True)
+      for q in 'train', 'val':
+          (dir / p / q).mkdir(parents=True, exist_ok=True)
+  # Train, Val Splits
+  for split, patches in [('train', 50 + 1), ('val', 43 + 1)]:
+      print(f"Processing {split} in {patches} patches ...")
+      images, labels = dir / 'images' / split, dir / 'labels' / split
+      # Download
+      url = f"https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/{split}/"
+      if split == 'train':
+          download([f'{url}zhiyuan_objv2_{split}.tar.gz'], dir=dir)  # annotations json
+          download([f'{url}patch{i}.tar.gz' for i in range(patches)], dir=images, curl=True, threads=8)
+      elif split == 'val':
+          download([f'{url}zhiyuan_objv2_{split}.json'], dir=dir)  # annotations json
+          download([f'{url}images/v1/patch{i}.tar.gz' for i in range(15 + 1)], dir=images, curl=True, threads=8)
+          download([f'{url}images/v2/patch{i}.tar.gz' for i in range(16, patches)], dir=images, curl=True, threads=8)
+      # Move
+      for f in tqdm(images.rglob('*.jpg'), desc=f'Moving {split} images'):
+          f.rename(images / f.name)  # move to /images/{split}
+      # Labels
+      coco = COCO(dir / f'zhiyuan_objv2_{split}.json')
+      names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
+      for cid, cat in enumerate(names):
+          catIds = coco.getCatIds(catNms=[cat])
+          imgIds = coco.getImgIds(catIds=catIds)
+          for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
+              width, height = im["width"], im["height"]
+              path = Path(im["file_name"])  # image filename
+              try:
+                  with open(labels / path.with_suffix('.txt').name, 'a') as file:
+                      annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
+                      for a in coco.loadAnns(annIds):
+                          x, y, w, h = a['bbox']  # bounding box in xywh (xy top-left corner)
+                          xyxy = np.array([x, y, x + w, y + h])[None]  # pixels(1,4)
+                          x, y, w, h = xyxy2xywhn(xyxy, w=width, h=height, clip=True)[0]  # normalized and clipped
+                          file.write(f"{cid} {x:.5f} {y:.5f} {w:.5f} {h:.5f}\n")
+              except Exception as e:
+                  print(e)

ultralytics/cfg/datasets/SKU-110K.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
+# Example usage: yolo train data=SKU-110K.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── SKU-110K  ← downloads here (13.6 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/SKU-110K  # dataset root dir
+train: train.txt  # train images (relative to 'path')  8219 images
+val: val.txt  # val images (relative to 'path')  588 images
+test: test.txt  # test images (optional)  2936 images
+# Classes
+names:
+  0: object
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import shutil
+  from pathlib import Path
+  import numpy as np
+  import pandas as pd
+  from tqdm import tqdm
+  from ultralytics.utils.downloads import download
+  from ultralytics.utils.ops import xyxy2xywh
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  parent = Path(dir.parent)  # download dir
+  urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
+  download(urls, dir=parent)
+  # Rename directories
+  if dir.exists():
+      shutil.rmtree(dir)
+  (parent / 'SKU110K_fixed').rename(dir)  # rename dir
+  (dir / 'labels').mkdir(parents=True, exist_ok=True)  # create labels dir
+  # Convert labels
+  names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height'  # column names
+  for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
+      x = pd.read_csv(dir / 'annotations' / d, names=names).values  # annotations
+      images, unique_images = x[:, 0], np.unique(x[:, 0])
+      with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
+          f.writelines(f'./images/{s}\n' for s in unique_images)
+      for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
+          cls = 0  # single-class dataset
+          with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
+              for r in x[images == im]:
+                  w, h = r[6], r[7]  # image width, height
+                  xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0]  # instance
+                  f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n")  # write label

ultralytics/cfg/datasets/VOC.yaml ADDED Viewed

	@@ -0,0 +1,100 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford
+# Example usage: yolo train data=VOC.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── VOC  ← downloads here (2.8 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VOC
+train: # train images (relative to 'path')  16551 images
+  - images/train2012
+  - images/train2007
+  - images/val2012
+  - images/val2007
+val: # val images (relative to 'path')  4952 images
+  - images/test2007
+test: # test images (optional)
+  - images/test2007
+# Classes
+names:
+  0: aeroplane
+  1: bicycle
+  2: bird
+  3: boat
+  4: bottle
+  5: bus
+  6: car
+  7: cat
+  8: chair
+  9: cow
+  10: diningtable
+  11: dog
+  12: horse
+  13: motorbike
+  14: person
+  15: pottedplant
+  16: sheep
+  17: sofa
+  18: train
+  19: tvmonitor
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import xml.etree.ElementTree as ET
+  from tqdm import tqdm
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+  def convert_label(path, lb_path, year, image_id):
+      def convert_box(size, box):
+          dw, dh = 1. / size[0], 1. / size[1]
+          x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
+          return x * dw, y * dh, w * dw, h * dh
+      in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
+      out_file = open(lb_path, 'w')
+      tree = ET.parse(in_file)
+      root = tree.getroot()
+      size = root.find('size')
+      w = int(size.find('width').text)
+      h = int(size.find('height').text)
+      names = list(yaml['names'].values())  # names list
+      for obj in root.iter('object'):
+          cls = obj.find('name').text
+          if cls in names and int(obj.find('difficult').text) != 1:
+              xmlbox = obj.find('bndbox')
+              bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
+              cls_id = names.index(cls)  # class id
+              out_file.write(" ".join(str(a) for a in (cls_id, *bb)) + '\n')
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [f'{url}VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
+          f'{url}VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
+          f'{url}VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
+  download(urls, dir=dir / 'images', curl=True, threads=3)
+  # Convert
+  path = dir / 'images/VOCdevkit'
+  for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
+      imgs_path = dir / 'images' / f'{image_set}{year}'
+      lbs_path = dir / 'labels' / f'{image_set}{year}'
+      imgs_path.mkdir(exist_ok=True, parents=True)
+      lbs_path.mkdir(exist_ok=True, parents=True)
+      with open(path / f'VOC{year}/ImageSets/Main/{image_set}.txt') as f:
+          image_ids = f.read().strip().split()
+      for id in tqdm(image_ids, desc=f'{image_set}{year}'):
+          f = path / f'VOC{year}/JPEGImages/{id}.jpg'  # old img path
+          lb_path = (lbs_path / f.name).with_suffix('.txt')  # new label path
+          f.rename(imgs_path / f.name)  # move image
+          convert_label(path, lb_path, year, id)  # convert labels to YOLO format

ultralytics/cfg/datasets/VisDrone.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Tianjin University
+# Example usage: yolo train data=VisDrone.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── VisDrone  ← downloads here (2.3 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/VisDrone  # dataset root dir
+train: VisDrone2019-DET-train/images  # train images (relative to 'path')  6471 images
+val: VisDrone2019-DET-val/images  # val images (relative to 'path')  548 images
+test: VisDrone2019-DET-test-dev/images  # test images (optional)  1610 images
+# Classes
+names:
+  0: pedestrian
+  1: people
+  2: bicycle
+  3: car
+  4: van
+  5: truck
+  6: tricycle
+  7: awning-tricycle
+  8: bus
+  9: motor
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import os
+  from pathlib import Path
+  from ultralytics.utils.downloads import download
+  def visdrone2yolo(dir):
+      from PIL import Image
+      from tqdm import tqdm
+      def convert_box(size, box):
+          # Convert VisDrone box to YOLO xywh box
+          dw = 1. / size[0]
+          dh = 1. / size[1]
+          return (box[0] + box[2] / 2) * dw, (box[1] + box[3] / 2) * dh, box[2] * dw, box[3] * dh
+      (dir / 'labels').mkdir(parents=True, exist_ok=True)  # make labels directory
+      pbar = tqdm((dir / 'annotations').glob('*.txt'), desc=f'Converting {dir}')
+      for f in pbar:
+          img_size = Image.open((dir / 'images' / f.name).with_suffix('.jpg')).size
+          lines = []
+          with open(f, 'r') as file:  # read annotation.txt
+              for row in [x.split(',') for x in file.read().strip().splitlines()]:
+                  if row[4] == '0':  # VisDrone 'ignored regions' class 0
+                      continue
+                  cls = int(row[5]) - 1
+                  box = convert_box(img_size, tuple(map(int, row[:4])))
+                  lines.append(f"{cls} {' '.join(f'{x:.6f}' for x in box)}\n")
+                  with open(str(f).replace(f'{os.sep}annotations{os.sep}', f'{os.sep}labels{os.sep}'), 'w') as fl:
+                      fl.writelines(lines)  # write label.txt
+  # Download
+  dir = Path(yaml['path'])  # dataset root dir
+  urls = ['https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-train.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
+          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
+  download(urls, dir=dir, curl=True, threads=4)
+  # Convert
+  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
+      visdrone2yolo(dir / d)  # convert VisDrone annotations to YOLO labels

ultralytics/cfg/datasets/coco-pose.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: yolo train data=coco-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco-pose  ← downloads here (20.1 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco-pose  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Keypoints
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+# Classes
+names:
+  0: person
+# Download script/URL (optional)
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+  # Download labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + 'coco2017labels-pose.zip']  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)

ultralytics/cfg/datasets/coco.yaml ADDED Viewed

	@@ -0,0 +1,115 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset http://cocodataset.org by Microsoft
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+# Download script/URL (optional)
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+  # Download labels
+  segments = True  # segment or box labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)

ultralytics/cfg/datasets/coco128-seg.yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: yolo train data=coco128.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco128-seg  ← downloads here (7 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128-seg  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128-seg.zip

ultralytics/cfg/datasets/coco128.yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
+# Example usage: yolo train data=coco128.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco128  ← downloads here (7 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco128  # dataset root dir
+train: images/train2017  # train images (relative to 'path') 128 images
+val: images/train2017  # val images (relative to 'path') 128 images
+test:  # test images (optional)
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco128.zip

ultralytics/cfg/datasets/coco8-pose.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO8-pose dataset (first 8 images from COCO train2017) by Ultralytics
+# Example usage: yolo train data=coco8-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8-pose  ← downloads here (1 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8-pose  # dataset root dir
+train: images/train  # train images (relative to 'path') 4 images
+val: images/val  # val images (relative to 'path') 4 images
+test:  # test images (optional)
+# Keypoints
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
+# Classes
+names:
+  0: person
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8-pose.zip

ultralytics/cfg/datasets/coco8-seg.yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO8-seg dataset (first 8 images from COCO train2017) by Ultralytics
+# Example usage: yolo train data=coco8-seg.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8-seg  ← downloads here (1 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8-seg  # dataset root dir
+train: images/train  # train images (relative to 'path') 4 images
+val: images/val  # val images (relative to 'path') 4 images
+test:  # test images (optional)
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8-seg.zip

ultralytics/cfg/datasets/coco8.yaml ADDED Viewed

	@@ -0,0 +1,101 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO8 dataset (first 8 images from COCO train2017) by Ultralytics
+# Example usage: yolo train data=coco8.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── coco8  ← downloads here (1 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/coco8  # dataset root dir
+train: images/train  # train images (relative to 'path') 4 images
+val: images/val  # val images (relative to 'path') 4 images
+test:  # test images (optional)
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/coco8.zip

ultralytics/cfg/datasets/open-images-v7.yaml ADDED Viewed

	@@ -0,0 +1,661 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Open Images v7 dataset https://storage.googleapis.com/openimages/web/index.html by Google
+# Example usage: yolo train data=open-images-v7.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── open-images-v7  ← downloads here (561 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/open-images-v7  # dataset root dir
+train: images/train  # train images (relative to 'path') 1743042 images
+val: images/val  # val images (relative to 'path') 41620 images
+test:  # test images (optional)
+# Classes
+names:
+  0: Accordion
+  1: Adhesive tape
+  2: Aircraft
+  3: Airplane
+  4: Alarm clock
+  5: Alpaca
+  6: Ambulance
+  7: Animal
+  8: Ant
+  9: Antelope
+  10: Apple
+  11: Armadillo
+  12: Artichoke
+  13: Auto part
+  14: Axe
+  15: Backpack
+  16: Bagel
+  17: Baked goods
+  18: Balance beam
+  19: Ball
+  20: Balloon
+  21: Banana
+  22: Band-aid
+  23: Banjo
+  24: Barge
+  25: Barrel
+  26: Baseball bat
+  27: Baseball glove
+  28: Bat (Animal)
+  29: Bathroom accessory
+  30: Bathroom cabinet
+  31: Bathtub
+  32: Beaker
+  33: Bear
+  34: Bed
+  35: Bee
+  36: Beehive
+  37: Beer
+  38: Beetle
+  39: Bell pepper
+  40: Belt
+  41: Bench
+  42: Bicycle
+  43: Bicycle helmet
+  44: Bicycle wheel
+  45: Bidet
+  46: Billboard
+  47: Billiard table
+  48: Binoculars
+  49: Bird
+  50: Blender
+  51: Blue jay
+  52: Boat
+  53: Bomb
+  54: Book
+  55: Bookcase
+  56: Boot
+  57: Bottle
+  58: Bottle opener
+  59: Bow and arrow
+  60: Bowl
+  61: Bowling equipment
+  62: Box
+  63: Boy
+  64: Brassiere
+  65: Bread
+  66: Briefcase
+  67: Broccoli
+  68: Bronze sculpture
+  69: Brown bear
+  70: Building
+  71: Bull
+  72: Burrito
+  73: Bus
+  74: Bust
+  75: Butterfly
+  76: Cabbage
+  77: Cabinetry
+  78: Cake
+  79: Cake stand
+  80: Calculator
+  81: Camel
+  82: Camera
+  83: Can opener
+  84: Canary
+  85: Candle
+  86: Candy
+  87: Cannon
+  88: Canoe
+  89: Cantaloupe
+  90: Car
+  91: Carnivore
+  92: Carrot
+  93: Cart
+  94: Cassette deck
+  95: Castle
+  96: Cat
+  97: Cat furniture
+  98: Caterpillar
+  99: Cattle
+  100: Ceiling fan
+  101: Cello
+  102: Centipede
+  103: Chainsaw
+  104: Chair
+  105: Cheese
+  106: Cheetah
+  107: Chest of drawers
+  108: Chicken
+  109: Chime
+  110: Chisel
+  111: Chopsticks
+  112: Christmas tree
+  113: Clock
+  114: Closet
+  115: Clothing
+  116: Coat
+  117: Cocktail
+  118: Cocktail shaker
+  119: Coconut
+  120: Coffee
+  121: Coffee cup
+  122: Coffee table
+  123: Coffeemaker
+  124: Coin
+  125: Common fig
+  126: Common sunflower
+  127: Computer keyboard
+  128: Computer monitor
+  129: Computer mouse
+  130: Container
+  131: Convenience store
+  132: Cookie
+  133: Cooking spray
+  134: Corded phone
+  135: Cosmetics
+  136: Couch
+  137: Countertop
+  138: Cowboy hat
+  139: Crab
+  140: Cream
+  141: Cricket ball
+  142: Crocodile
+  143: Croissant
+  144: Crown
+  145: Crutch
+  146: Cucumber
+  147: Cupboard
+  148: Curtain
+  149: Cutting board
+  150: Dagger
+  151: Dairy Product
+  152: Deer
+  153: Desk
+  154: Dessert
+  155: Diaper
+  156: Dice
+  157: Digital clock
+  158: Dinosaur
+  159: Dishwasher
+  160: Dog
+  161: Dog bed
+  162: Doll
+  163: Dolphin
+  164: Door
+  165: Door handle
+  166: Doughnut
+  167: Dragonfly
+  168: Drawer
+  169: Dress
+  170: Drill (Tool)
+  171: Drink
+  172: Drinking straw
+  173: Drum
+  174: Duck
+  175: Dumbbell
+  176: Eagle
+  177: Earrings
+  178: Egg (Food)
+  179: Elephant
+  180: Envelope
+  181: Eraser
+  182: Face powder
+  183: Facial tissue holder
+  184: Falcon
+  185: Fashion accessory
+  186: Fast food
+  187: Fax
+  188: Fedora
+  189: Filing cabinet
+  190: Fire hydrant
+  191: Fireplace
+  192: Fish
+  193: Flag
+  194: Flashlight
+  195: Flower
+  196: Flowerpot
+  197: Flute
+  198: Flying disc
+  199: Food
+  200: Food processor
+  201: Football
+  202: Football helmet
+  203: Footwear
+  204: Fork
+  205: Fountain
+  206: Fox
+  207: French fries
+  208: French horn
+  209: Frog
+  210: Fruit
+  211: Frying pan
+  212: Furniture
+  213: Garden Asparagus
+  214: Gas stove
+  215: Giraffe
+  216: Girl
+  217: Glasses
+  218: Glove
+  219: Goat
+  220: Goggles
+  221: Goldfish
+  222: Golf ball
+  223: Golf cart
+  224: Gondola
+  225: Goose
+  226: Grape
+  227: Grapefruit
+  228: Grinder
+  229: Guacamole
+  230: Guitar
+  231: Hair dryer
+  232: Hair spray
+  233: Hamburger
+  234: Hammer
+  235: Hamster
+  236: Hand dryer
+  237: Handbag
+  238: Handgun
+  239: Harbor seal
+  240: Harmonica
+  241: Harp
+  242: Harpsichord
+  243: Hat
+  244: Headphones
+  245: Heater
+  246: Hedgehog
+  247: Helicopter
+  248: Helmet
+  249: High heels
+  250: Hiking equipment
+  251: Hippopotamus
+  252: Home appliance
+  253: Honeycomb
+  254: Horizontal bar
+  255: Horse
+  256: Hot dog
+  257: House
+  258: Houseplant
+  259: Human arm
+  260: Human beard
+  261: Human body
+  262: Human ear
+  263: Human eye
+  264: Human face
+  265: Human foot
+  266: Human hair
+  267: Human hand
+  268: Human head
+  269: Human leg
+  270: Human mouth
+  271: Human nose
+  272: Humidifier
+  273: Ice cream
+  274: Indoor rower
+  275: Infant bed
+  276: Insect
+  277: Invertebrate
+  278: Ipod
+  279: Isopod
+  280: Jacket
+  281: Jacuzzi
+  282: Jaguar (Animal)
+  283: Jeans
+  284: Jellyfish
+  285: Jet ski
+  286: Jug
+  287: Juice
+  288: Kangaroo
+  289: Kettle
+  290: Kitchen & dining room table
+  291: Kitchen appliance
+  292: Kitchen knife
+  293: Kitchen utensil
+  294: Kitchenware
+  295: Kite
+  296: Knife
+  297: Koala
+  298: Ladder
+  299: Ladle
+  300: Ladybug
+  301: Lamp
+  302: Land vehicle
+  303: Lantern
+  304: Laptop
+  305: Lavender (Plant)
+  306: Lemon
+  307: Leopard
+  308: Light bulb
+  309: Light switch
+  310: Lighthouse
+  311: Lily
+  312: Limousine
+  313: Lion
+  314: Lipstick
+  315: Lizard
+  316: Lobster
+  317: Loveseat
+  318: Luggage and bags
+  319: Lynx
+  320: Magpie
+  321: Mammal
+  322: Man
+  323: Mango
+  324: Maple
+  325: Maracas
+  326: Marine invertebrates
+  327: Marine mammal
+  328: Measuring cup
+  329: Mechanical fan
+  330: Medical equipment
+  331: Microphone
+  332: Microwave oven
+  333: Milk
+  334: Miniskirt
+  335: Mirror
+  336: Missile
+  337: Mixer
+  338: Mixing bowl
+  339: Mobile phone
+  340: Monkey
+  341: Moths and butterflies
+  342: Motorcycle
+  343: Mouse
+  344: Muffin
+  345: Mug
+  346: Mule
+  347: Mushroom
+  348: Musical instrument
+  349: Musical keyboard
+  350: Nail (Construction)
+  351: Necklace
+  352: Nightstand
+  353: Oboe
+  354: Office building
+  355: Office supplies
+  356: Orange
+  357: Organ (Musical Instrument)
+  358: Ostrich
+  359: Otter
+  360: Oven
+  361: Owl
+  362: Oyster
+  363: Paddle
+  364: Palm tree
+  365: Pancake
+  366: Panda
+  367: Paper cutter
+  368: Paper towel
+  369: Parachute
+  370: Parking meter
+  371: Parrot
+  372: Pasta
+  373: Pastry
+  374: Peach
+  375: Pear
+  376: Pen
+  377: Pencil case
+  378: Pencil sharpener
+  379: Penguin
+  380: Perfume
+  381: Person
+  382: Personal care
+  383: Personal flotation device
+  384: Piano
+  385: Picnic basket
+  386: Picture frame
+  387: Pig
+  388: Pillow
+  389: Pineapple
+  390: Pitcher (Container)
+  391: Pizza
+  392: Pizza cutter
+  393: Plant
+  394: Plastic bag
+  395: Plate
+  396: Platter
+  397: Plumbing fixture
+  398: Polar bear
+  399: Pomegranate
+  400: Popcorn
+  401: Porch
+  402: Porcupine
+  403: Poster
+  404: Potato
+  405: Power plugs and sockets
+  406: Pressure cooker
+  407: Pretzel
+  408: Printer
+  409: Pumpkin
+  410: Punching bag
+  411: Rabbit
+  412: Raccoon
+  413: Racket
+  414: Radish
+  415: Ratchet (Device)
+  416: Raven
+  417: Rays and skates
+  418: Red panda
+  419: Refrigerator
+  420: Remote control
+  421: Reptile
+  422: Rhinoceros
+  423: Rifle
+  424: Ring binder
+  425: Rocket
+  426: Roller skates
+  427: Rose
+  428: Rugby ball
+  429: Ruler
+  430: Salad
+  431: Salt and pepper shakers
+  432: Sandal
+  433: Sandwich
+  434: Saucer
+  435: Saxophone
+  436: Scale
+  437: Scarf
+  438: Scissors
+  439: Scoreboard
+  440: Scorpion
+  441: Screwdriver
+  442: Sculpture
+  443: Sea lion
+  444: Sea turtle
+  445: Seafood
+  446: Seahorse
+  447: Seat belt
+  448: Segway
+  449: Serving tray
+  450: Sewing machine
+  451: Shark
+  452: Sheep
+  453: Shelf
+  454: Shellfish
+  455: Shirt
+  456: Shorts
+  457: Shotgun
+  458: Shower
+  459: Shrimp
+  460: Sink
+  461: Skateboard
+  462: Ski
+  463: Skirt
+  464: Skull
+  465: Skunk
+  466: Skyscraper
+  467: Slow cooker
+  468: Snack
+  469: Snail
+  470: Snake
+  471: Snowboard
+  472: Snowman
+  473: Snowmobile
+  474: Snowplow
+  475: Soap dispenser
+  476: Sock
+  477: Sofa bed
+  478: Sombrero
+  479: Sparrow
+  480: Spatula
+  481: Spice rack
+  482: Spider
+  483: Spoon
+  484: Sports equipment
+  485: Sports uniform
+  486: Squash (Plant)
+  487: Squid
+  488: Squirrel
+  489: Stairs
+  490: Stapler
+  491: Starfish
+  492: Stationary bicycle
+  493: Stethoscope
+  494: Stool
+  495: Stop sign
+  496: Strawberry
+  497: Street light
+  498: Stretcher
+  499: Studio couch
+  500: Submarine
+  501: Submarine sandwich
+  502: Suit
+  503: Suitcase
+  504: Sun hat
+  505: Sunglasses
+  506: Surfboard
+  507: Sushi
+  508: Swan
+  509: Swim cap
+  510: Swimming pool
+  511: Swimwear
+  512: Sword
+  513: Syringe
+  514: Table
+  515: Table tennis racket
+  516: Tablet computer
+  517: Tableware
+  518: Taco
+  519: Tank
+  520: Tap
+  521: Tart
+  522: Taxi
+  523: Tea
+  524: Teapot
+  525: Teddy bear
+  526: Telephone
+  527: Television
+  528: Tennis ball
+  529: Tennis racket
+  530: Tent
+  531: Tiara
+  532: Tick
+  533: Tie
+  534: Tiger
+  535: Tin can
+  536: Tire
+  537: Toaster
+  538: Toilet
+  539: Toilet paper
+  540: Tomato
+  541: Tool
+  542: Toothbrush
+  543: Torch
+  544: Tortoise
+  545: Towel
+  546: Tower
+  547: Toy
+  548: Traffic light
+  549: Traffic sign
+  550: Train
+  551: Training bench
+  552: Treadmill
+  553: Tree
+  554: Tree house
+  555: Tripod
+  556: Trombone
+  557: Trousers
+  558: Truck
+  559: Trumpet
+  560: Turkey
+  561: Turtle
+  562: Umbrella
+  563: Unicycle
+  564: Van
+  565: Vase
+  566: Vegetable
+  567: Vehicle
+  568: Vehicle registration plate
+  569: Violin
+  570: Volleyball (Ball)
+  571: Waffle
+  572: Waffle iron
+  573: Wall clock
+  574: Wardrobe
+  575: Washing machine
+  576: Waste container
+  577: Watch
+  578: Watercraft
+  579: Watermelon
+  580: Weapon
+  581: Whale
+  582: Wheel
+  583: Wheelchair
+  584: Whisk
+  585: Whiteboard
+  586: Willow
+  587: Window
+  588: Window blind
+  589: Wine
+  590: Wine glass
+  591: Wine rack
+  592: Winter melon
+  593: Wok
+  594: Woman
+  595: Wood-burning stove
+  596: Woodpecker
+  597: Worm
+  598: Wrench
+  599: Zebra
+  600: Zucchini
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  from ultralytics.utils import LOGGER, SETTINGS, Path, is_ubuntu, get_ubuntu_version
+  from ultralytics.utils.checks import check_requirements, check_version
+  check_requirements('fiftyone')
+  if is_ubuntu() and check_version(get_ubuntu_version(), '>=22.04'):
+      # Ubuntu>=22.04 patch https://github.com/voxel51/fiftyone/issues/2961#issuecomment-1666519347
+      check_requirements('fiftyone-db-ubuntu2204')
+  import fiftyone as fo
+  import fiftyone.zoo as foz
+  import warnings
+  name = 'open-images-v7'
+  fraction = 1.0  # fraction of full dataset to use
+  LOGGER.warning('WARNING ⚠️ Open Images V7 dataset requires at least **561 GB of free space. Starting download...')
+  for split in 'train', 'validation':  # 1743042 train, 41620 val images
+      train = split == 'train'
+      # Load Open Images dataset
+      dataset = foz.load_zoo_dataset(name,
+                                     split=split,
+                                     label_types=['detections'],
+                                     dataset_dir=Path(SETTINGS['datasets_dir']) / 'fiftyone' / name,
+                                     max_samples=round((1743042 if train else 41620) * fraction))
+      # Define classes
+      if train:
+          classes = dataset.default_classes  # all classes
+          # classes = dataset.distinct('ground_truth.detections.label')  # only observed classes
+      # Export to YOLO format
+      with warnings.catch_warnings():
+          warnings.filterwarnings("ignore", category=UserWarning, module="fiftyone.utils.yolo")
+          dataset.export(export_dir=str(Path(SETTINGS['datasets_dir']) / name),
+                         dataset_type=fo.types.YOLOv5Dataset,
+                         label_field='ground_truth',
+                         split='val' if split == 'validation' else split,
+                         classes=classes,
+                         overwrite=train)

ultralytics/cfg/datasets/tiger-pose.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Tiger Pose dataset by Ultralytics
+# Example usage: yolo train data=tiger-pose.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── tiger-pose  ← downloads here (75.3 MB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/tiger-pose  # dataset root dir
+train: train  # train images (relative to 'path') 210 images
+val: val  # val images (relative to 'path') 53 images
+# Keypoints
+kpt_shape: [12, 2]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+flip_idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+# Classes
+names:
+  0: tiger
+# Download script/URL (optional)
+download: https://ultralytics.com/assets/tiger-pose.zip

ultralytics/cfg/datasets/xView.yaml ADDED Viewed

	@@ -0,0 +1,153 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. National Geospatial-Intelligence Agency (NGA)
+# --------  DOWNLOAD DATA MANUALLY and jar xf val_images.zip to 'datasets/xView' before running train command!  --------
+# Example usage: yolo train data=xView.yaml
+# parent
+# ├── ultralytics
+# └── datasets
+#     └── xView  ← downloads here (20.7 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ../datasets/xView  # dataset root dir
+train: images/autosplit_train.txt  # train images (relative to 'path') 90% of 847 train images
+val: images/autosplit_val.txt  # train images (relative to 'path') 10% of 847 train images
+# Classes
+names:
+  0: Fixed-wing Aircraft
+  1: Small Aircraft
+  2: Cargo Plane
+  3: Helicopter
+  4: Passenger Vehicle
+  5: Small Car
+  6: Bus
+  7: Pickup Truck
+  8: Utility Truck
+  9: Truck
+  10: Cargo Truck
+  11: Truck w/Box
+  12: Truck Tractor
+  13: Trailer
+  14: Truck w/Flatbed
+  15: Truck w/Liquid
+  16: Crane Truck
+  17: Railway Vehicle
+  18: Passenger Car
+  19: Cargo Car
+  20: Flat Car
+  21: Tank car
+  22: Locomotive
+  23: Maritime Vessel
+  24: Motorboat
+  25: Sailboat
+  26: Tugboat
+  27: Barge
+  28: Fishing Vessel
+  29: Ferry
+  30: Yacht
+  31: Container Ship
+  32: Oil Tanker
+  33: Engineering Vehicle
+  34: Tower crane
+  35: Container Crane
+  36: Reach Stacker
+  37: Straddle Carrier
+  38: Mobile Crane
+  39: Dump Truck
+  40: Haul Truck
+  41: Scraper/Tractor
+  42: Front loader/Bulldozer
+  43: Excavator
+  44: Cement Mixer
+  45: Ground Grader
+  46: Hut/Tent
+  47: Shed
+  48: Building
+  49: Aircraft Hangar
+  50: Damaged Building
+  51: Facility
+  52: Construction Site
+  53: Vehicle Lot
+  54: Helipad
+  55: Storage Tank
+  56: Shipping container lot
+  57: Shipping Container
+  58: Pylon
+  59: Tower
+# Download script/URL (optional) ---------------------------------------------------------------------------------------
+download: |
+  import json
+  import os
+  from pathlib import Path
+  import numpy as np
+  from PIL import Image
+  from tqdm import tqdm
+  from ultralytics.data.utils import autosplit
+  from ultralytics.utils.ops import xyxy2xywhn
+  def convert_labels(fname=Path('xView/xView_train.geojson')):
+      # Convert xView geoJSON labels to YOLO format
+      path = fname.parent
+      with open(fname) as f:
+          print(f'Loading {fname}...')
+          data = json.load(f)
+      # Make dirs
+      labels = Path(path / 'labels' / 'train')
+      os.system(f'rm -rf {labels}')
+      labels.mkdir(parents=True, exist_ok=True)
+      # xView classes 11-94 to 0-59
+      xview_class2index = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, -1, 9, 10, 11,
+                           12, 13, 14, 15, -1, -1, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26, 27, -1, 28, -1,
+                           29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, -1, -1, -1, -1, 46,
+                           47, 48, 49, -1, 50, 51, -1, 52, -1, -1, -1, 53, 54, -1, 55, -1, -1, 56, -1, 57, -1, 58, 59]
+      shapes = {}
+      for feature in tqdm(data['features'], desc=f'Converting {fname}'):
+          p = feature['properties']
+          if p['bounds_imcoords']:
+              id = p['image_id']
+              file = path / 'train_images' / id
+              if file.exists():  # 1395.tif missing
+                  try:
+                      box = np.array([int(num) for num in p['bounds_imcoords'].split(",")])
+                      assert box.shape[0] == 4, f'incorrect box shape {box.shape[0]}'
+                      cls = p['type_id']
+                      cls = xview_class2index[int(cls)]  # xView class to 0-60
+                      assert 59 >= cls >= 0, f'incorrect class index {cls}'
+                      # Write YOLO label
+                      if id not in shapes:
+                          shapes[id] = Image.open(file).size
+                      box = xyxy2xywhn(box[None].astype(np.float), w=shapes[id][0], h=shapes[id][1], clip=True)
+                      with open((labels / id).with_suffix('.txt'), 'a') as f:
+                          f.write(f"{cls} {' '.join(f'{x:.6f}' for x in box[0])}\n")  # write label.txt
+                  except Exception as e:
+                      print(f'WARNING: skipping one label for {file}: {e}')
+  # Download manually from https://challenge.xviewdataset.org
+  dir = Path(yaml['path'])  # dataset root dir
+  # urls = ['https://d307kc0mrhucc3.cloudfront.net/train_labels.zip',  # train labels
+  #         'https://d307kc0mrhucc3.cloudfront.net/train_images.zip',  # 15G, 847 train images
+  #         'https://d307kc0mrhucc3.cloudfront.net/val_images.zip']  # 5G, 282 val images (no labels)
+  # download(urls, dir=dir)
+  # Convert labels
+  convert_labels(dir / 'xView_train.geojson')
+  # Move images
+  images = Path(dir / 'images')
+  images.mkdir(parents=True, exist_ok=True)
+  Path(dir / 'train_images').rename(dir / 'images' / 'train')
+  Path(dir / 'val_images').rename(dir / 'images' / 'val')
+  # Split
+  autosplit(dir / 'images' / 'train')

ultralytics/cfg/default.yaml ADDED Viewed

	@@ -0,0 +1,119 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default training settings and hyperparameters for medium-augmentation COCO training
+task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+# Train settings -------------------------------------------------------------------------------------------------------
+model:  # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
+data:  # (str, optional) path to data file, i.e. coco128.yaml
+epochs: 100  # (int) number of epochs to train for
+patience: 50  # (int) epochs to wait for no observable improvement for early stopping of training
+batch: 16  # (int) number of images per batch (-1 for AutoBatch)
+imgsz: 640  # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+save: True  # (bool) save train checkpoints and predict results
+save_period: -1 # (int) Save checkpoint every x epochs (disabled if < 1)
+cache: False  # (bool) True/ram, disk or False. Use cache for data loading
+device:  # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8  # (int) number of worker threads for data loading (per RANK if DDP)
+project:  # (str, optional) project name
+name:  # (str, optional) experiment name, results saved to 'project/name' directory
+exist_ok: False  # (bool) whether to overwrite existing experiment
+pretrained: True  # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
+optimizer: auto  # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
+verbose: True  # (bool) whether to print verbose output
+seed: 0  # (int) random seed for reproducibility
+deterministic: True  # (bool) whether to enable deterministic mode
+single_cls: False  # (bool) train multi-class data as single-class
+rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cos_lr: False  # (bool) use cosine learning rate scheduler
+close_mosaic: 10  # (int) disable mosaic augmentation for final epochs (0 to disable)
+resume: False  # (bool) resume training from last checkpoint
+amp: True  # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
+profile: False  # (bool) profile ONNX and TensorRT speeds during training for loggers
+freeze: None  # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
+# Segmentation
+overlap_mask: True  # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4  # (int) mask downsample ratio (segment train only)
+# Classification
+dropout: 0.0  # (float) use dropout regularization (classify train only)
+# Val/Test settings ----------------------------------------------------------------------------------------------------
+val: True  # (bool) validate/test during training
+split: val  # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
+save_json: False  # (bool) save results to JSON file
+save_hybrid: False  # (bool) save hybrid version of labels (labels + additional predictions)
+conf:  # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
+iou: 0.7  # (float) intersection over union (IoU) threshold for NMS
+max_det: 300  # (int) maximum number of detections per image
+half: False  # (bool) use half precision (FP16)
+dnn: False  # (bool) use OpenCV DNN for ONNX inference
+plots: True  # (bool) save plots and images during train/val
+# Predict settings -----------------------------------------------------------------------------------------------------
+source:  # (str, optional) source directory for images or videos
+vid_stride: 1  # (int) video frame-rate stride
+stream_buffer: False  # (bool) buffer all streaming frames (True) or return the most recent frame (False)
+visualize: False  # (bool) visualize model features
+augment: False  # (bool) apply image augmentation to prediction sources
+agnostic_nms: False  # (bool) class-agnostic NMS
+classes:  # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+retina_masks: False  # (bool) use high-resolution segmentation masks
+# Visualize settings ---------------------------------------------------------------------------------------------------
+show: False  # (bool) show predicted images and videos if environment allows
+save_frames: False  # (bool) save predicted individual video frames
+save_txt: False  # (bool) save results as .txt file
+save_conf: False  # (bool) save results with confidence scores
+save_crop: False  # (bool) save cropped images with results
+show_labels: True  # (bool) show prediction labels, i.e. 'person'
+show_conf: True  # (bool) show prediction confidence, i.e. '0.99'
+show_boxes: True  # (bool) show prediction boxes
+line_width:   # (int, optional) line width of the bounding boxes. Scaled to image size if None.
+# Export settings ------------------------------------------------------------------------------------------------------
+format: torchscript  # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats
+keras: False  # (bool) use Kera=s
+optimize: False  # (bool) TorchScript: optimize for mobile
+int8: False  # (bool) CoreML/TF INT8 quantization
+dynamic: False  # (bool) ONNX/TF/TensorRT: dynamic axes
+simplify: False  # (bool) ONNX: simplify model
+opset:  # (int, optional) ONNX: opset version
+workspace: 4  # (int) TensorRT: workspace size (GB)
+nms: False  # (bool) CoreML: add NMS
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+lr0: 0.01  # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01  # (float) final learning rate (lr0 * lrf)
+momentum: 0.937  # (float) SGD momentum/Adam beta1
+weight_decay: 0.0005  # (float) optimizer weight decay 5e-4
+warmup_epochs: 3.0  # (float) warmup epochs (fractions ok)
+warmup_momentum: 0.8  # (float) warmup initial momentum
+warmup_bias_lr: 0.1  # (float) warmup initial bias lr
+box: 7.5  # (float) box loss gain
+cls: 0.5  # (float) cls loss gain (scale with pixels)
+dfl: 1.5  # (float) dfl loss gain
+pose: 12.0  # (float) pose loss gain
+kobj: 1.0  # (float) keypoint obj loss gain
+label_smoothing: 0.0  # (float) label smoothing (fraction)
+nbs: 64  # (int) nominal batch size
+hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0  # (float) image rotation (+/- deg)
+translate: 0.1  # (float) image translation (+/- fraction)
+scale: 0.5  # (float) image scale (+/- gain)
+shear: 0.0  # (float) image shear (+/- deg)
+perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # (float) image flip up-down (probability)
+fliplr: 0.5  # (float) image flip left-right (probability)
+mosaic: 1.0  # (float) image mosaic (probability)
+mixup: 0.0  # (float) image mixup (probability)
+copy_paste: 0.0  # (float) segment copy-paste (probability)
+# Custom config.yaml ---------------------------------------------------------------------------------------------------
+cfg:  # (str, optional) for overriding defaults.yaml
+# Tracker settings ------------------------------------------------------------------------------------------------------
+tracker: botsort.yaml  # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]

ultralytics/cfg/models/README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+## Models
+Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
+These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
+To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.ultralytics.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
+### Usage
+Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
+```bash
+yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
+```
+They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.ultralytics.com/usage/cfg/) as in the CLI example above:
+```python
+from ultralytics import YOLO
+model = YOLO("model.yaml")  # build a YOLOv8n model from scratch
+# YOLO("model.pt")  use pre-trained model if available
+model.info()  # display model information
+model.train(data="coco128.yaml", epochs=100)  # train the model
+```
+## Pre-trained Model Architectures
+Ultralytics supports many model architectures. Visit https://docs.ultralytics.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
+## Contribute New Models
+Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
+By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
+To get started, please consult our [Contributing Guide](https://docs.ultralytics.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
+Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!

ultralytics/cfg/models/rt-detr/rtdetr-l.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, HGStem, [32, 48]]  # 0-P2/4
+  - [-1, 6, HGBlock, [48, 128, 3]]  # stage 1
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]]  # 2-P3/8
+  - [-1, 6, HGBlock, [96, 512, 3]]  # stage 2
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]]  # 4-P3/16
+  - [-1, 6, HGBlock, [192, 1024, 5, True, False]]  # cm, c2, k, light, shortcut
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]  # stage 3
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]]  # 8-P4/32
+  - [-1, 6, HGBlock, [384, 2048, 5, True, False]]  # stage 4
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 10 input_proj.2
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]]  # 12, Y5, lateral_convs.0
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 14 input_proj.1
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]]  # 16, fpn_blocks.0
+  - [-1, 1, Conv, [256, 1, 1]]  # 17, Y4, lateral_convs.1
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 19 input_proj.0
+  - [[-2, -1], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, RepC3, [256]]  # X3 (21), fpn_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]]  # 22, downsample_convs.0
+  - [[-1, 17], 1, Concat, [1]]  # cat Y4
+  - [-1, 3, RepC3, [256]]  # F4 (24), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]]  # 25, downsample_convs.1
+  - [[-1, 12], 1, Concat, [1]]  # cat Y5
+  - [-1, 3, RepC3, [256]]  # F5 (27), pan_blocks.1
+  - [[21, 24, 27], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml ADDED Viewed

	@@ -0,0 +1,42 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]]  # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]]  # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]]  # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]]  # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]]  # 4
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]]  # 7
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]]  # 11
+  - [-1, 1, Conv, [256, 1, 1]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 14
+  - [[-2, -1], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, RepC3, [256]]  # X3 (16), fpn_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]]  # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]]  # cat Y4
+  - [-1, 3, RepC3, [256]]  # F4 (19), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]]  # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]]  # cat Y5
+  - [-1, 3, RepC3, [256]]  # F5 (22), pan_blocks.1
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml ADDED Viewed

	@@ -0,0 +1,42 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]]  # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]]  # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]]  # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]]  # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]]  # 4
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]]  # 7
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]]  # 11
+  - [-1, 1, Conv, [256, 1, 1]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]]  # 14
+  - [[-2, -1], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, RepC3, [256]]  # X3 (16), fpn_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]]  # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]]  # cat Y4
+  - [-1, 3, RepC3, [256]]  # F4 (19), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]]  # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]]  # cat Y5
+  - [-1, 3, RepC3, [256]]  # F5 (22), pan_blocks.1
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/rt-detr/rtdetr-x.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  x: [1.00, 1.00, 2048]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, HGStem, [32, 64]]  # 0-P2/4
+  - [-1, 6, HGBlock, [64, 128, 3]]  # stage 1
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]]  # 2-P3/8
+  - [-1, 6, HGBlock, [128, 512, 3]]
+  - [-1, 6, HGBlock, [128, 512, 3, False, True]]  # 4-stage 2
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]]  # 5-P3/16
+  - [-1, 6, HGBlock, [256, 1024, 5, True, False]]  # cm, c2, k, light, shortcut
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]  # 10-stage 3
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]]  # 11-P4/32
+  - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
+  - [-1, 6, HGBlock, [512, 2048, 5, True, True]]  # 13-stage 4
+head:
+  - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]]  # 14 input_proj.2
+  - [-1, 1, AIFI, [2048, 8]]
+  - [-1, 1, Conv, [384, 1, 1]]  # 16, Y5, lateral_convs.0
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]]  # 18 input_proj.1
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [384]]  # 20, fpn_blocks.0
+  - [-1, 1, Conv, [384, 1, 1]]  # 21, Y4, lateral_convs.1
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]]  # 23 input_proj.0
+  - [[-2, -1], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, RepC3, [384]]  # X3 (25), fpn_blocks.1
+  - [-1, 1, Conv, [384, 3, 2]]  # 26, downsample_convs.0
+  - [[-1, 21], 1, Concat, [1]]  # cat Y4
+  - [-1, 3, RepC3, [384]]  # F4 (28), pan_blocks.0
+  - [-1, 1, Conv, [384, 3, 2]]  # 29, downsample_convs.1
+  - [[-1, 16], 1, Concat, [1]]  # cat Y5
+  - [-1, 3, RepC3, [384]]  # F5 (31), pan_blocks.1
+  - [[25, 28, 31], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/v3/yolov3-spp.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+# YOLOv3-SPP head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, SPP, [512, [5, 9, 13]]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+   [[27, 22, 15], 1, Detect, [nc]],   # Detect(P3, P4, P5)
+  ]

ultralytics/cfg/models/v3/yolov3-tiny.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [16, 3, 1]],  # 0
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 1-P1/2
+   [-1, 1, Conv, [32, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 3-P2/4
+   [-1, 1, Conv, [64, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 5-P3/8
+   [-1, 1, Conv, [128, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 7-P4/16
+   [-1, 1, Conv, [256, 3, 1]],
+   [-1, 1, nn.MaxPool2d, [2, 2, 0]],  # 9-P5/32
+   [-1, 1, Conv, [512, 3, 1]],
+   [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]],  # 11
+   [-1, 1, nn.MaxPool2d, [2, 1, 0]],  # 12
+  ]
+# YOLOv3-tiny head
+head:
+  [[-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 15 (P5/32-large)
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Conv, [256, 3, 1]],  # 19 (P4/16-medium)
+   [[19, 15], 1, Detect, [nc]],  # Detect(P4, P5)
+  ]

ultralytics/cfg/models/v3/yolov3.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov3
+# Parameters
+nc: 80  # number of classes
+depth_multiple: 1.0  # model depth multiple
+width_multiple: 1.0  # layer channel multiple
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [32, 3, 1]],  # 0
+   [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2
+   [-1, 1, Bottleneck, [64]],
+   [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4
+   [-1, 2, Bottleneck, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 5-P3/8
+   [-1, 8, Bottleneck, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 7-P4/16
+   [-1, 8, Bottleneck, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P5/32
+   [-1, 4, Bottleneck, [1024]],  # 10
+  ]
+# YOLOv3 head
+head:
+  [[-1, 1, Bottleneck, [1024, False]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, Conv, [1024, 3, 1]],  # 15 (P5/32-large)
+   [-2, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P4
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Bottleneck, [512, False]],
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, Conv, [512, 3, 1]],  # 22 (P4/16-medium)
+   [-2, 1, Conv, [128, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P3
+   [-1, 1, Bottleneck, [256, False]],
+   [-1, 2, Bottleneck, [256, False]],  # 27 (P3/8-small)
+   [[27, 22, 15], 1, Detect, [nc]],   # Detect(P3, P4, P5)
+  ]

ultralytics/cfg/models/v5/yolov5-p6.yaml ADDED Viewed

	@@ -0,0 +1,61 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.ultralytics.com/models/yolov5
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.33, 1.25, 1024]
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [768]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
+  ]
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [768, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 8], 1, Concat, [1]],  # cat backbone P5
+   [-1, 3, C3, [768, False]],  # 15
+   [-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 19
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 23 (P3/8-small)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 20], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 26 (P4/16-medium)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 16], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [768, False]],  # 29 (P5/32-large)
+   [-1, 1, Conv, [768, 3, 2]],
+   [[-1, 12], 1, Concat, [1]],  # cat head P6
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
+   [[23, 26, 29, 32], 1, Detect, [nc]],  # Detect(P3, P4, P5, P6)
+  ]

ultralytics/cfg/models/v5/yolov5.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/yolov5
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.33, 1.25, 1024]
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
+   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
+   [-1, 3, C3, [128]],
+   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
+   [-1, 6, C3, [256]],
+   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
+   [-1, 9, C3, [512]],
+   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
+  ]
+# YOLOv5 v6.0 head
+head:
+  [[-1, 1, Conv, [512, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
+   [-1, 3, C3, [512, False]],  # 13
+   [-1, 1, Conv, [256, 1, 1]],
+   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
+   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
+   [-1, 1, Conv, [256, 3, 2]],
+   [[-1, 14], 1, Concat, [1]],  # cat head P4
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
+   [-1, 1, Conv, [512, 3, 2]],
+   [[-1, 10], 1, Concat, [1]],  # cat head P5
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
+   [[17, 20, 23], 1, Detect, [nc]],  # Detect(P3, P4, P5)
+  ]

ultralytics/cfg/models/v6/yolov6.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/models/yolov6
+# Parameters
+nc: 80  # number of classes
+activation: nn.ReLU()  # (optional) model default activation function
+scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv6-3.0s backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 6, Conv, [128, 3, 1]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 12, Conv, [256, 3, 1]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 18, Conv, [512, 3, 1]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 6, Conv, [1024, 3, 1]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv6-3.0s head
+head:
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 9, Conv, [256, 3, 1]]  # 14
+  - [-1, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, Conv, [128, 3, 1]]
+  - [-1, 9, Conv, [128, 3, 1]]  # 19
+  - [-1, 1, Conv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 9, Conv, [256, 3, 1]]  # 23
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, Conv, [512, 3, 1]]
+  - [-1, 9, Conv, [512, 3, 1]]  # 27
+  - [[19, 23, 27], 1, Detect, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/v8/yolov8-cls.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.ultralytics.com/tasks/classify
+# Parameters
+nc: 1000  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]]  # Classify

ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters,   2033928 gradients,  13.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters,   5562064 gradients,  25.1 GFLOPs
+  m: [0.67, 0.75,  768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters,   9031712 gradients,  42.8 GFLOPs
+  l: [1.00, 1.00,  512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients,  69.1 GFLOPs
+  x: [1.00, 1.25,  512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0-ghost-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C3Ghost, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C3Ghost, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 2], 1, Concat, [1]]  # cat backbone P2
+  - [-1, 3, C3Ghost, [128]]  # 18 (P2/4-xsmall)
+  - [-1, 1, GhostConv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]]  # cat head P3
+  - [-1, 3, C3Ghost, [256]]  # 21 (P3/8-small)
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C3Ghost, [512]]  # 24 (P4/16-medium)
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C3Ghost, [1024]]  # 27 (P5/32-large)
+  - [[18, 21, 24, 27], 1, Detect, [nc]]  # Detect(P2, P3, P4, P5)

ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
+  m: [0.67, 0.75, 768]  # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
+  l: [1.00, 1.00, 512]  # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
+  x: [1.00, 1.25, 512]  # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 3, C3Ghost, [768, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 11
+# YOLOv8.0-ghost-p6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
+  - [-1, 3, C3Ghost, [768]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C3Ghost, [512]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C3Ghost, [256]]  # 20 (P3/8-small)
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C3Ghost, [512]]  # 23 (P4/16-medium)
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C3Ghost, [768]]  # 26 (P5/32-large)
+  - [-1, 1, GhostConv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]]  # cat head P6
+  - [-1, 3, C3Ghost, [1024]]  # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Detect, [nc]]  # Detect(P3, P4, P5, P6)

ultralytics/cfg/models/v8/yolov8-ghost.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]  # YOLOv8n-ghost summary: 403 layers,  1865316 parameters,  1865300 gradients,   5.8 GFLOPs
+  s: [0.33, 0.50, 1024]  # YOLOv8s-ghost summary: 403 layers,  5960072 parameters,  5960056 gradients,  16.4 GFLOPs
+  m: [0.67, 0.75, 768]   # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients,  32.7 GFLOPs
+  l: [1.00, 1.00, 512]   # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients,  53.7 GFLOPs
+  x: [1.00, 1.25, 512]   # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients,  83.3 GFLOPs
+# YOLOv8.0n-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C3Ghost, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C3Ghost, [256]]  # 15 (P3/8-small)
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C3Ghost, [512]]  # 18 (P4/16-medium)
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C3Ghost, [1024]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/v8/yolov8-p2.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 2], 1, Concat, [1]]  # cat backbone P2
+  - [-1, 3, C2f, [128]]  # 18 (P2/4-xsmall)
+  - [-1, 1, Conv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]]  # cat head P3
+  - [-1, 3, C2f, [256]]  # 21 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 24 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 27 (P5/32-large)
+  - [[18, 21, 24, 27], 1, Detect, [nc]]  # Detect(P2, P3, P4, P5)

ultralytics/cfg/models/v8/yolov8-p6.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
+  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]]  # cat head P6
+  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Detect, [nc]]  # Detect(P3, P4, P5, P6)

ultralytics/cfg/models/v8/yolov8-pose-p6.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+# Parameters
+nc: 1  # number of classes
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
+  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]]  # cat head P6
+  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]]  # Pose(P3, P4, P5, P6)

ultralytics/cfg/models/v8/yolov8-pose.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.ultralytics.com/tasks/pose
+# Parameters
+nc: 1  # number of classes
+kpt_shape: [17, 3]  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Pose, [nc, kpt_shape]]  # Pose(P3, P4, P5)

ultralytics/cfg/models/v8/yolov8-rtdetr.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, RTDETRDecoder, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/v8/yolov8-seg-p6.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 8], 1, Concat, [1]]  # cat backbone P5
+  - [-1, 3, C2, [768, False]]  # 14
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2, [512, False]]  # 17
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2, [256, False]]  # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2, [512, False]]  # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2, [768, False]]  # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]]  # cat head P6
+  - [-1, 3, C2, [1024, False]]  # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]]  # Pose(P3, P4, P5, P6)

ultralytics/cfg/models/v8/yolov8-seg.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.ultralytics.com/tasks/segment
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)

ultralytics/cfg/models/v8/yolov8.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect
+# Parameters
+nc: 80  # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]  # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024]  # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768]   # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512]   # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512]   # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]]  # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]]  # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]]  # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]]  # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 3, C2f, [512]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 3, C2f, [256]]  # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 3, C2f, [512]]  # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 3, C2f, [1024]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/trackers/botsort.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
+tracker_type: botsort  # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5  # threshold for the first association
+track_low_thresh: 0.1  # threshold for the second association
+new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
+track_buffer: 30  # buffer to calculate the time when to remove tracks
+match_thresh: 0.8  # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)
+# BoT-SORT settings
+gmc_method: sparseOptFlow  # method of global motion compensation
+# ReID model related thresh (not supported yet)
+proximity_thresh: 0.5
+appearance_thresh: 0.25
+with_reid: False

ultralytics/cfg/trackers/bytetrack.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
+tracker_type: bytetrack  # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5  # threshold for the first association
+track_low_thresh: 0.1  # threshold for the second association
+new_track_thresh: 0.6  # threshold for init new track if the detection does not match any tracks
+track_buffer: 30  # buffer to calculate the time when to remove tracks
+match_thresh: 0.8  # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)

ultralytics/data/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .base import BaseDataset
+from .build import build_dataloader, build_yolo_dataset, load_inference_source
+from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
+__all__ = ('BaseDataset', 'ClassificationDataset', 'SemanticDataset', 'YOLODataset', 'build_yolo_dataset',
+           'build_dataloader', 'load_inference_source')

ultralytics/data/annotator.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from pathlib import Path
+from ultralytics import SAM, YOLO
+def auto_annotate(data, det_model='yolov8x.pt', sam_model='sam_b.pt', device='', output_dir=None):
+    """
+    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
+    Args:
+        data (str): Path to a folder containing images to be annotated.
+        det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
+        sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
+        device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
+        output_dir (str | None | optional): Directory to save the annotated results.
+            Defaults to a 'labels' folder in the same directory as 'data'.
+    Example:
+        ```python
+        from ultralytics.data.annotator import auto_annotate
+        auto_annotate(data='ultralytics/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
+        ```
+    """
+    det_model = YOLO(det_model)
+    sam_model = SAM(sam_model)
+    data = Path(data)
+    if not output_dir:
+        output_dir = data.parent / f'{data.stem}_auto_annotate_labels'
+    Path(output_dir).mkdir(exist_ok=True, parents=True)
+    det_results = det_model(data, stream=True, device=device)
+    for result in det_results:
+        class_ids = result.boxes.cls.int().tolist()  # noqa
+        if len(class_ids):
+            boxes = result.boxes.xyxy  # Boxes object for bbox outputs
+            sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
+            segments = sam_results[0].masks.xyn  # noqa
+            with open(f'{str(Path(output_dir) / Path(result.path).stem)}.txt', 'w') as f:
+                for i in range(len(segments)):
+                    s = segments[i]
+                    if len(s) == 0:
+                        continue
+                    segment = map(str, segments[i].reshape(-1).tolist())
+                    f.write(f'{class_ids[i]} ' + ' '.join(segment) + '\n')

ultralytics/data/augment.py ADDED Viewed

	@@ -0,0 +1,1107 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import math
+import random
+from copy import deepcopy
+import cv2
+import numpy as np
+import torch
+import torchvision.transforms as T
+from ultralytics.utils import LOGGER, colorstr
+from ultralytics.utils.checks import check_version
+from ultralytics.utils.instance import Instances
+from ultralytics.utils.metrics import bbox_ioa
+from ultralytics.utils.ops import segment2box
+from .utils import polygons2masks, polygons2masks_overlap
+# TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
+class BaseTransform:
+    """
+    Base class for image transformations.
+    This is a generic transformation class that can be extended for specific image processing needs.
+    The class is designed to be compatible with both classification and semantic segmentation tasks.
+    Methods:
+        __init__: Initializes the BaseTransform object.
+        apply_image: Applies image transformation to labels.
+        apply_instances: Applies transformations to object instances in labels.
+        apply_semantic: Applies semantic segmentation to an image.
+        __call__: Applies all label transformations to an image, instances, and semantic masks.
+    """
+    def __init__(self) -> None:
+        """Initializes the BaseTransform object."""
+        pass
+    def apply_image(self, labels):
+        """Applies image transformations to labels."""
+        pass
+    def apply_instances(self, labels):
+        """Applies transformations to object instances in labels."""
+        pass
+    def apply_semantic(self, labels):
+        """Applies semantic segmentation to an image."""
+        pass
+    def __call__(self, labels):
+        """Applies all label transformations to an image, instances, and semantic masks."""
+        self.apply_image(labels)
+        self.apply_instances(labels)
+        self.apply_semantic(labels)
+class Compose:
+    """Class for composing multiple image transformations."""
+    def __init__(self, transforms):
+        """Initializes the Compose object with a list of transforms."""
+        self.transforms = transforms
+    def __call__(self, data):
+        """Applies a series of transformations to input data."""
+        for t in self.transforms:
+            data = t(data)
+        return data
+    def append(self, transform):
+        """Appends a new transform to the existing list of transforms."""
+        self.transforms.append(transform)
+    def tolist(self):
+        """Converts the list of transforms to a standard Python list."""
+        return self.transforms
+    def __repr__(self):
+        """Returns a string representation of the object."""
+        return f"{self.__class__.__name__}({', '.join([f'{t}' for t in self.transforms])})"
+class BaseMixTransform:
+    """
+    Class for base mix (MixUp/Mosaic) transformations.
+    This implementation is from mmyolo.
+    """
+    def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+        """Initializes the BaseMixTransform object with dataset, pre_transform, and probability."""
+        self.dataset = dataset
+        self.pre_transform = pre_transform
+        self.p = p
+    def __call__(self, labels):
+        """Applies pre-processing transforms and mixup/mosaic transforms to labels data."""
+        if random.uniform(0, 1) > self.p:
+            return labels
+        # Get index of one or three other images
+        indexes = self.get_indexes()
+        if isinstance(indexes, int):
+            indexes = [indexes]
+        # Get images information will be used for Mosaic or MixUp
+        mix_labels = [self.dataset.get_image_and_label(i) for i in indexes]
+        if self.pre_transform is not None:
+            for i, data in enumerate(mix_labels):
+                mix_labels[i] = self.pre_transform(data)
+        labels['mix_labels'] = mix_labels
+        # Mosaic or MixUp
+        labels = self._mix_transform(labels)
+        labels.pop('mix_labels', None)
+        return labels
+    def _mix_transform(self, labels):
+        """Applies MixUp or Mosaic augmentation to the label dictionary."""
+        raise NotImplementedError
+    def get_indexes(self):
+        """Gets a list of shuffled indexes for mosaic augmentation."""
+        raise NotImplementedError
+class Mosaic(BaseMixTransform):
+    """
+    Mosaic augmentation.
+    This class performs mosaic augmentation by combining multiple (4 or 9) images into a single mosaic image.
+    The augmentation is applied to a dataset with a given probability.
+    Attributes:
+        dataset: The dataset on which the mosaic augmentation is applied.
+        imgsz (int, optional): Image size (height and width) after mosaic pipeline of a single image. Default to 640.
+        p (float, optional): Probability of applying the mosaic augmentation. Must be in the range 0-1. Default to 1.0.
+        n (int, optional): The grid size, either 4 (for 2x2) or 9 (for 3x3).
+    """
+    def __init__(self, dataset, imgsz=640, p=1.0, n=4):
+        """Initializes the object with a dataset, image size, probability, and border."""
+        assert 0 <= p <= 1.0, f'The probability should be in range [0, 1], but got {p}.'
+        assert n in (4, 9), 'grid must be equal to 4 or 9.'
+        super().__init__(dataset=dataset, p=p)
+        self.dataset = dataset
+        self.imgsz = imgsz
+        self.border = (-imgsz // 2, -imgsz // 2)  # width, height
+        self.n = n
+    def get_indexes(self, buffer=True):
+        """Return a list of random indexes from the dataset."""
+        if buffer:  # select images from buffer
+            return random.choices(list(self.dataset.buffer), k=self.n - 1)
+        else:  # select any images
+            return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
+    def _mix_transform(self, labels):
+        """Apply mixup transformation to the input image and labels."""
+        assert labels.get('rect_shape', None) is None, 'rect and mosaic are mutually exclusive.'
+        assert len(labels.get('mix_labels', [])), 'There are no other images for mosaic augment.'
+        return self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)
+    def _mosaic4(self, labels):
+        """Create a 2x2 image mosaic."""
+        mosaic_labels = []
+        s = self.imgsz
+        yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.border)  # mosaic center x, y
+        for i in range(4):
+            labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
+            # Load image
+            img = labels_patch['img']
+            h, w = labels_patch.pop('resized_shape')
+            # Place img in img4
+            if i == 0:  # top left
+                img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+            labels_patch = self._update_labels(labels_patch, padw, padh)
+            mosaic_labels.append(labels_patch)
+        final_labels = self._cat_labels(mosaic_labels)
+        final_labels['img'] = img4
+        return final_labels
+    def _mosaic9(self, labels):
+        """Create a 3x3 image mosaic."""
+        mosaic_labels = []
+        s = self.imgsz
+        hp, wp = -1, -1  # height, width previous
+        for i in range(9):
+            labels_patch = labels if i == 0 else labels['mix_labels'][i - 1]
+            # Load image
+            img = labels_patch['img']
+            h, w = labels_patch.pop('resized_shape')
+            # Place img in img9
+            if i == 0:  # center
+                img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+                h0, w0 = h, w
+                c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
+            elif i == 1:  # top
+                c = s, s - h, s + w, s
+            elif i == 2:  # top right
+                c = s + wp, s - h, s + wp + w, s
+            elif i == 3:  # right
+                c = s + w0, s, s + w0 + w, s + h
+            elif i == 4:  # bottom right
+                c = s + w0, s + hp, s + w0 + w, s + hp + h
+            elif i == 5:  # bottom
+                c = s + w0 - w, s + h0, s + w0, s + h0 + h
+            elif i == 6:  # bottom left
+                c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
+            elif i == 7:  # left
+                c = s - w, s + h0 - h, s, s + h0
+            elif i == 8:  # top left
+                c = s - w, s + h0 - hp - h, s, s + h0 - hp
+            padw, padh = c[:2]
+            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
+            # Image
+            img9[y1:y2, x1:x2] = img[y1 - padh:, x1 - padw:]  # img9[ymin:ymax, xmin:xmax]
+            hp, wp = h, w  # height, width previous for next iteration
+            # Labels assuming imgsz*2 mosaic size
+            labels_patch = self._update_labels(labels_patch, padw + self.border[0], padh + self.border[1])
+            mosaic_labels.append(labels_patch)
+        final_labels = self._cat_labels(mosaic_labels)
+        final_labels['img'] = img9[-self.border[0]:self.border[0], -self.border[1]:self.border[1]]
+        return final_labels
+    @staticmethod
+    def _update_labels(labels, padw, padh):
+        """Update labels."""
+        nh, nw = labels['img'].shape[:2]
+        labels['instances'].convert_bbox(format='xyxy')
+        labels['instances'].denormalize(nw, nh)
+        labels['instances'].add_padding(padw, padh)
+        return labels
+    def _cat_labels(self, mosaic_labels):
+        """Return labels with mosaic border instances clipped."""
+        if len(mosaic_labels) == 0:
+            return {}
+        cls = []
+        instances = []
+        imgsz = self.imgsz * 2  # mosaic imgsz
+        for labels in mosaic_labels:
+            cls.append(labels['cls'])
+            instances.append(labels['instances'])
+        final_labels = {
+            'im_file': mosaic_labels[0]['im_file'],
+            'ori_shape': mosaic_labels[0]['ori_shape'],
+            'resized_shape': (imgsz, imgsz),
+            'cls': np.concatenate(cls, 0),
+            'instances': Instances.concatenate(instances, axis=0),
+            'mosaic_border': self.border}  # final_labels
+        final_labels['instances'].clip(imgsz, imgsz)
+        good = final_labels['instances'].remove_zero_area_boxes()
+        final_labels['cls'] = final_labels['cls'][good]
+        return final_labels
+class MixUp(BaseMixTransform):
+    """Class for applying MixUp augmentation to the dataset."""
+    def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
+        """Initializes MixUp object with dataset, pre_transform, and probability of applying MixUp."""
+        super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
+    def get_indexes(self):
+        """Get a random index from the dataset."""
+        return random.randint(0, len(self.dataset) - 1)
+    def _mix_transform(self, labels):
+        """Applies MixUp augmentation as per https://arxiv.org/pdf/1710.09412.pdf."""
+        r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
+        labels2 = labels['mix_labels'][0]
+        labels['img'] = (labels['img'] * r + labels2['img'] * (1 - r)).astype(np.uint8)
+        labels['instances'] = Instances.concatenate([labels['instances'], labels2['instances']], axis=0)
+        labels['cls'] = np.concatenate([labels['cls'], labels2['cls']], 0)
+        return labels
+class RandomPerspective:
+    """
+    Implements random perspective and affine transformations on images and corresponding bounding boxes, segments, and
+    keypoints. These transformations include rotation, translation, scaling, and shearing. The class also offers the
+    option to apply these transformations conditionally with a specified probability.
+    Attributes:
+        degrees (float): Degree range for random rotations.
+        translate (float): Fraction of total width and height for random translation.
+        scale (float): Scaling factor interval, e.g., a scale factor of 0.1 allows a resize between 90%-110%.
+        shear (float): Shear intensity (angle in degrees).
+        perspective (float): Perspective distortion factor.
+        border (tuple): Tuple specifying mosaic border.
+        pre_transform (callable): A function/transform to apply to the image before starting the random transformation.
+    Methods:
+        affine_transform(img, border): Applies a series of affine transformations to the image.
+        apply_bboxes(bboxes, M): Transforms bounding boxes using the calculated affine matrix.
+        apply_segments(segments, M): Transforms segments and generates new bounding boxes.
+        apply_keypoints(keypoints, M): Transforms keypoints.
+        __call__(labels): Main method to apply transformations to both images and their corresponding annotations.
+        box_candidates(box1, box2): Filters out bounding boxes that don't meet certain criteria post-transformation.
+    """
+    def __init__(self,
+                 degrees=0.0,
+                 translate=0.1,
+                 scale=0.5,
+                 shear=0.0,
+                 perspective=0.0,
+                 border=(0, 0),
+                 pre_transform=None):
+        """Initializes RandomPerspective object with transformation parameters."""
+        self.degrees = degrees
+        self.translate = translate
+        self.scale = scale
+        self.shear = shear
+        self.perspective = perspective
+        self.border = border  # mosaic border
+        self.pre_transform = pre_transform
+    def affine_transform(self, img, border):
+        """
+        Applies a sequence of affine transformations centered around the image center.
+        Args:
+            img (ndarray): Input image.
+            border (tuple): Border dimensions.
+        Returns:
+            img (ndarray): Transformed image.
+            M (ndarray): Transformation matrix.
+            s (float): Scale factor.
+        """
+        # Center
+        C = np.eye(3, dtype=np.float32)
+        C[0, 2] = -img.shape[1] / 2  # x translation (pixels)
+        C[1, 2] = -img.shape[0] / 2  # y translation (pixels)
+        # Perspective
+        P = np.eye(3, dtype=np.float32)
+        P[2, 0] = random.uniform(-self.perspective, self.perspective)  # x perspective (about y)
+        P[2, 1] = random.uniform(-self.perspective, self.perspective)  # y perspective (about x)
+        # Rotation and Scale
+        R = np.eye(3, dtype=np.float32)
+        a = random.uniform(-self.degrees, self.degrees)
+        # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
+        s = random.uniform(1 - self.scale, 1 + self.scale)
+        # s = 2 ** random.uniform(-scale, scale)
+        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+        # Shear
+        S = np.eye(3, dtype=np.float32)
+        S[0, 1] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan(random.uniform(-self.shear, self.shear) * math.pi / 180)  # y shear (deg)
+        # Translation
+        T = np.eye(3, dtype=np.float32)
+        T[0, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[0]  # x translation (pixels)
+        T[1, 2] = random.uniform(0.5 - self.translate, 0.5 + self.translate) * self.size[1]  # y translation (pixels)
+        # Combined rotation matrix
+        M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
+        # Affine image
+        if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+            if self.perspective:
+                img = cv2.warpPerspective(img, M, dsize=self.size, borderValue=(114, 114, 114))
+            else:  # affine
+                img = cv2.warpAffine(img, M[:2], dsize=self.size, borderValue=(114, 114, 114))
+        return img, M, s
+    def apply_bboxes(self, bboxes, M):
+        """
+        Apply affine to bboxes only.
+        Args:
+            bboxes (ndarray): list of bboxes, xyxy format, with shape (num_bboxes, 4).
+            M (ndarray): affine matrix.
+        Returns:
+            new_bboxes (ndarray): bboxes after affine, [num_bboxes, 4].
+        """
+        n = len(bboxes)
+        if n == 0:
+            return bboxes
+        xy = np.ones((n * 4, 3), dtype=bboxes.dtype)
+        xy[:, :2] = bboxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+        xy = xy @ M.T  # transform
+        xy = (xy[:, :2] / xy[:, 2:3] if self.perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+        # Create new boxes
+        x = xy[:, [0, 2, 4, 6]]
+        y = xy[:, [1, 3, 5, 7]]
+        return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
+    def apply_segments(self, segments, M):
+        """
+        Apply affine to segments and generate new bboxes from segments.
+        Args:
+            segments (ndarray): list of segments, [num_samples, 500, 2].
+            M (ndarray): affine matrix.
+        Returns:
+            new_segments (ndarray): list of segments after affine, [num_samples, 500, 2].
+            new_bboxes (ndarray): bboxes after affine, [N, 4].
+        """
+        n, num = segments.shape[:2]
+        if n == 0:
+            return [], segments
+        xy = np.ones((n * num, 3), dtype=segments.dtype)
+        segments = segments.reshape(-1, 2)
+        xy[:, :2] = segments
+        xy = xy @ M.T  # transform
+        xy = xy[:, :2] / xy[:, 2:3]
+        segments = xy.reshape(n, -1, 2)
+        bboxes = np.stack([segment2box(xy, self.size[0], self.size[1]) for xy in segments], 0)
+        return bboxes, segments
+    def apply_keypoints(self, keypoints, M):
+        """
+        Apply affine to keypoints.
+        Args:
+            keypoints (ndarray): keypoints, [N, 17, 3].
+            M (ndarray): affine matrix.
+        Returns:
+            new_keypoints (ndarray): keypoints after affine, [N, 17, 3].
+        """
+        n, nkpt = keypoints.shape[:2]
+        if n == 0:
+            return keypoints
+        xy = np.ones((n * nkpt, 3), dtype=keypoints.dtype)
+        visible = keypoints[..., 2].reshape(n * nkpt, 1)
+        xy[:, :2] = keypoints[..., :2].reshape(n * nkpt, 2)
+        xy = xy @ M.T  # transform
+        xy = xy[:, :2] / xy[:, 2:3]  # perspective rescale or affine
+        out_mask = (xy[:, 0] < 0) | (xy[:, 1] < 0) | (xy[:, 0] > self.size[0]) | (xy[:, 1] > self.size[1])
+        visible[out_mask] = 0
+        return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
+    def __call__(self, labels):
+        """
+        Affine images and targets.
+        Args:
+            labels (dict): a dict of `bboxes`, `segments`, `keypoints`.
+        """
+        if self.pre_transform and 'mosaic_border' not in labels:
+            labels = self.pre_transform(labels)
+        labels.pop('ratio_pad', None)  # do not need ratio pad
+        img = labels['img']
+        cls = labels['cls']
+        instances = labels.pop('instances')
+        # Make sure the coord formats are right
+        instances.convert_bbox(format='xyxy')
+        instances.denormalize(*img.shape[:2][::-1])
+        border = labels.pop('mosaic_border', self.border)
+        self.size = img.shape[1] + border[1] * 2, img.shape[0] + border[0] * 2  # w, h
+        # M is affine matrix
+        # Scale for func:`box_candidates`
+        img, M, scale = self.affine_transform(img, border)
+        bboxes = self.apply_bboxes(instances.bboxes, M)
+        segments = instances.segments
+        keypoints = instances.keypoints
+        # Update bboxes if there are segments.
+        if len(segments):
+            bboxes, segments = self.apply_segments(segments, M)
+        if keypoints is not None:
+            keypoints = self.apply_keypoints(keypoints, M)
+        new_instances = Instances(bboxes, segments, keypoints, bbox_format='xyxy', normalized=False)
+        # Clip
+        new_instances.clip(*self.size)
+        # Filter instances
+        instances.scale(scale_w=scale, scale_h=scale, bbox_only=True)
+        # Make the bboxes have the same scale with new_bboxes
+        i = self.box_candidates(box1=instances.bboxes.T,
+                                box2=new_instances.bboxes.T,
+                                area_thr=0.01 if len(segments) else 0.10)
+        labels['instances'] = new_instances[i]
+        labels['cls'] = cls[i]
+        labels['img'] = img
+        labels['resized_shape'] = img.shape[:2]
+        return labels
+    def box_candidates(self, box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):
+        """
+        Compute box candidates based on a set of thresholds. This method compares the characteristics of the boxes
+        before and after augmentation to decide whether a box is a candidate for further processing.
+        Args:
+            box1 (numpy.ndarray): The 4,n bounding box before augmentation, represented as [x1, y1, x2, y2].
+            box2 (numpy.ndarray): The 4,n bounding box after augmentation, represented as [x1, y1, x2, y2].
+            wh_thr (float, optional): The width and height threshold in pixels. Default is 2.
+            ar_thr (float, optional): The aspect ratio threshold. Default is 100.
+            area_thr (float, optional): The area ratio threshold. Default is 0.1.
+            eps (float, optional): A small epsilon value to prevent division by zero. Default is 1e-16.
+        Returns:
+            (numpy.ndarray): A boolean array indicating which boxes are candidates based on the given thresholds.
+        """
+        w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+        w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+        ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
+        return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
+class RandomHSV:
+    """
+    This class is responsible for performing random adjustments to the Hue, Saturation, and Value (HSV) channels of an
+    image.
+    The adjustments are random but within limits set by hgain, sgain, and vgain.
+    """
+    def __init__(self, hgain=0.5, sgain=0.5, vgain=0.5) -> None:
+        """
+        Initialize RandomHSV class with gains for each HSV channel.
+        Args:
+            hgain (float, optional): Maximum variation for hue. Default is 0.5.
+            sgain (float, optional): Maximum variation for saturation. Default is 0.5.
+            vgain (float, optional): Maximum variation for value. Default is 0.5.
+        """
+        self.hgain = hgain
+        self.sgain = sgain
+        self.vgain = vgain
+    def __call__(self, labels):
+        """
+        Applies random HSV augmentation to an image within the predefined limits.
+        The modified image replaces the original image in the input 'labels' dict.
+        """
+        img = labels['img']
+        if self.hgain or self.sgain or self.vgain:
+            r = np.random.uniform(-1, 1, 3) * [self.hgain, self.sgain, self.vgain] + 1  # random gains
+            hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
+            dtype = img.dtype  # uint8
+            x = np.arange(0, 256, dtype=r.dtype)
+            lut_hue = ((x * r[0]) % 180).astype(dtype)
+            lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+            lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+            im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+            cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
+        return labels
+class RandomFlip:
+    """
+    Applies a random horizontal or vertical flip to an image with a given probability.
+    Also updates any instances (bounding boxes, keypoints, etc.) accordingly.
+    """
+    def __init__(self, p=0.5, direction='horizontal', flip_idx=None) -> None:
+        """
+        Initializes the RandomFlip class with probability and direction.
+        Args:
+            p (float, optional): The probability of applying the flip. Must be between 0 and 1. Default is 0.5.
+            direction (str, optional): The direction to apply the flip. Must be 'horizontal' or 'vertical'.
+                Default is 'horizontal'.
+            flip_idx (array-like, optional): Index mapping for flipping keypoints, if any.
+        """
+        assert direction in ['horizontal', 'vertical'], f'Support direction `horizontal` or `vertical`, got {direction}'
+        assert 0 <= p <= 1.0
+        self.p = p
+        self.direction = direction
+        self.flip_idx = flip_idx
+    def __call__(self, labels):
+        """
+        Applies random flip to an image and updates any instances like bounding boxes or keypoints accordingly.
+        Args:
+            labels (dict): A dictionary containing the keys 'img' and 'instances'. 'img' is the image to be flipped.
+                           'instances' is an object containing bounding boxes and optionally keypoints.
+        Returns:
+            (dict): The same dict with the flipped image and updated instances under the 'img' and 'instances' keys.
+        """
+        img = labels['img']
+        instances = labels.pop('instances')
+        instances.convert_bbox(format='xywh')
+        h, w = img.shape[:2]
+        h = 1 if instances.normalized else h
+        w = 1 if instances.normalized else w
+        # Flip up-down
+        if self.direction == 'vertical' and random.random() < self.p:
+            img = np.flipud(img)
+            instances.flipud(h)
+        if self.direction == 'horizontal' and random.random() < self.p:
+            img = np.fliplr(img)
+            instances.fliplr(w)
+            # For keypoints
+            if self.flip_idx is not None and instances.keypoints is not None:
+                instances.keypoints = np.ascontiguousarray(instances.keypoints[:, self.flip_idx, :])
+        labels['img'] = np.ascontiguousarray(img)
+        labels['instances'] = instances
+        return labels
+class LetterBox:
+    """Resize image and padding for detection, instance segmentation, pose."""
+    def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32):
+        """Initialize LetterBox object with specific parameters."""
+        self.new_shape = new_shape
+        self.auto = auto
+        self.scaleFill = scaleFill
+        self.scaleup = scaleup
+        self.stride = stride
+        self.center = center  # Put the image in the middle or top-left
+    def __call__(self, labels=None, image=None):
+        """Return updated labels and image with added border."""
+        if labels is None:
+            labels = {}
+        img = labels.get('img') if image is None else image
+        shape = img.shape[:2]  # current shape [height, width]
+        new_shape = labels.pop('rect_shape', self.new_shape)
+        if isinstance(new_shape, int):
+            new_shape = (new_shape, new_shape)
+        # Scale ratio (new / old)
+        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+        if not self.scaleup:  # only scale down, do not scale up (for better val mAP)
+            r = min(r, 1.0)
+        # Compute padding
+        ratio = r, r  # width, height ratios
+        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+        if self.auto:  # minimum rectangle
+            dw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride)  # wh padding
+        elif self.scaleFill:  # stretch
+            dw, dh = 0.0, 0.0
+            new_unpad = (new_shape[1], new_shape[0])
+            ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+        if self.center:
+            dw /= 2  # divide padding into 2 sides
+            dh /= 2
+        if shape[::-1] != new_unpad:  # resize
+            img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
+        left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
+        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
+                                 value=(114, 114, 114))  # add border
+        if labels.get('ratio_pad'):
+            labels['ratio_pad'] = (labels['ratio_pad'], (left, top))  # for evaluation
+        if len(labels):
+            labels = self._update_labels(labels, ratio, dw, dh)
+            labels['img'] = img
+            labels['resized_shape'] = new_shape
+            return labels
+        else:
+            return img
+    def _update_labels(self, labels, ratio, padw, padh):
+        """Update labels."""
+        labels['instances'].convert_bbox(format='xyxy')
+        labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
+        labels['instances'].scale(*ratio)
+        labels['instances'].add_padding(padw, padh)
+        return labels
+class CopyPaste:
+    """
+    Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
+    responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
+    """
+    def __init__(self, p=0.5) -> None:
+        """
+        Initializes the CopyPaste class with a given probability.
+        Args:
+            p (float, optional): The probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
+                                 Default is 0.5.
+        """
+        self.p = p
+    def __call__(self, labels):
+        """
+        Applies the Copy-Paste augmentation to the given image and instances.
+        Args:
+            labels (dict): A dictionary containing:
+                           - 'img': The image to augment.
+                           - 'cls': Class labels associated with the instances.
+                           - 'instances': Object containing bounding boxes, and optionally, keypoints and segments.
+        Returns:
+            (dict): Dict with augmented image and updated instances under the 'img', 'cls', and 'instances' keys.
+        Notes:
+            1. Instances are expected to have 'segments' as one of their attributes for this augmentation to work.
+            2. This method modifies the input dictionary 'labels' in place.
+        """
+        im = labels['img']
+        cls = labels['cls']
+        h, w = im.shape[:2]
+        instances = labels.pop('instances')
+        instances.convert_bbox(format='xyxy')
+        instances.denormalize(w, h)
+        if self.p and len(instances.segments):
+            n = len(instances)
+            _, w, _ = im.shape  # height, width, channels
+            im_new = np.zeros(im.shape, np.uint8)
+            # Calculate ioa first then select indexes randomly
+            ins_flip = deepcopy(instances)
+            ins_flip.fliplr(w)
+            ioa = bbox_ioa(ins_flip.bboxes, instances.bboxes)  # intersection over area, (N, M)
+            indexes = np.nonzero((ioa < 0.30).all(1))[0]  # (N, )
+            n = len(indexes)
+            for j in random.sample(list(indexes), k=round(self.p * n)):
+                cls = np.concatenate((cls, cls[[j]]), axis=0)
+                instances = Instances.concatenate((instances, ins_flip[[j]]), axis=0)
+                cv2.drawContours(im_new, instances.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED)
+            result = cv2.flip(im, 1)  # augment segments (flip left-right)
+            i = cv2.flip(im_new, 1).astype(bool)
+            im[i] = result[i]
+        labels['img'] = im
+        labels['cls'] = cls
+        labels['instances'] = instances
+        return labels
+class Albumentations:
+    """
+    Albumentations transformations.
+    Optional, uninstall package to disable. Applies Blur, Median Blur, convert to grayscale, Contrast Limited Adaptive
+    Histogram Equalization, random change of brightness and contrast, RandomGamma and lowering of image quality by
+    compression.
+    """
+    def __init__(self, p=1.0):
+        """Initialize the transform object for YOLO bbox formatted params."""
+        self.p = p
+        self.transform = None
+        prefix = colorstr('albumentations: ')
+        try:
+            import albumentations as A
+            check_version(A.__version__, '1.0.3', hard=True)  # version requirement
+            T = [
+                A.Blur(p=0.01),
+                A.MedianBlur(p=0.01),
+                A.ToGray(p=0.01),
+                A.CLAHE(p=0.01),
+                A.RandomBrightnessContrast(p=0.0),
+                A.RandomGamma(p=0.0),
+                A.ImageCompression(quality_lower=75, p=0.0)]  # transforms
+            self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
+            LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
+        except ImportError:  # package not installed, skip
+            pass
+        except Exception as e:
+            LOGGER.info(f'{prefix}{e}')
+    def __call__(self, labels):
+        """Generates object detections and returns a dictionary with detection results."""
+        im = labels['img']
+        cls = labels['cls']
+        if len(cls):
+            labels['instances'].convert_bbox('xywh')
+            labels['instances'].normalize(*im.shape[:2][::-1])
+            bboxes = labels['instances'].bboxes
+            # TODO: add supports of segments and keypoints
+            if self.transform and random.random() < self.p:
+                new = self.transform(image=im, bboxes=bboxes, class_labels=cls)  # transformed
+                if len(new['class_labels']) > 0:  # skip update if no bbox in new im
+                    labels['img'] = new['image']
+                    labels['cls'] = np.array(new['class_labels'])
+                    bboxes = np.array(new['bboxes'], dtype=np.float32)
+            labels['instances'].update(bboxes=bboxes)
+        return labels
+# TODO: technically this is not an augmentation, maybe we should put this to another files
+class Format:
+    """
+    Formats image annotations for object detection, instance segmentation, and pose estimation tasks. The class
+    standardizes the image and instance annotations to be used by the `collate_fn` in PyTorch DataLoader.
+    Attributes:
+        bbox_format (str): Format for bounding boxes. Default is 'xywh'.
+        normalize (bool): Whether to normalize bounding boxes. Default is True.
+        return_mask (bool): Return instance masks for segmentation. Default is False.
+        return_keypoint (bool): Return keypoints for pose estimation. Default is False.
+        mask_ratio (int): Downsample ratio for masks. Default is 4.
+        mask_overlap (bool): Whether to overlap masks. Default is True.
+        batch_idx (bool): Keep batch indexes. Default is True.
+    """
+    def __init__(self,
+                 bbox_format='xywh',
+                 normalize=True,
+                 return_mask=False,
+                 return_keypoint=False,
+                 mask_ratio=4,
+                 mask_overlap=True,
+                 batch_idx=True):
+        """Initializes the Format class with given parameters."""
+        self.bbox_format = bbox_format
+        self.normalize = normalize
+        self.return_mask = return_mask  # set False when training detection only
+        self.return_keypoint = return_keypoint
+        self.mask_ratio = mask_ratio
+        self.mask_overlap = mask_overlap
+        self.batch_idx = batch_idx  # keep the batch indexes
+    def __call__(self, labels):
+        """Return formatted image, classes, bounding boxes & keypoints to be used by 'collate_fn'."""
+        img = labels.pop('img')
+        h, w = img.shape[:2]
+        cls = labels.pop('cls')
+        instances = labels.pop('instances')
+        instances.convert_bbox(format=self.bbox_format)
+        instances.denormalize(w, h)
+        nl = len(instances)
+        if self.return_mask:
+            if nl:
+                masks, instances, cls = self._format_segments(instances, cls, w, h)
+                masks = torch.from_numpy(masks)
+            else:
+                masks = torch.zeros(1 if self.mask_overlap else nl, img.shape[0] // self.mask_ratio,
+                                    img.shape[1] // self.mask_ratio)
+            labels['masks'] = masks
+        if self.normalize:
+            instances.normalize(w, h)
+        labels['img'] = self._format_img(img)
+        labels['cls'] = torch.from_numpy(cls) if nl else torch.zeros(nl)
+        labels['bboxes'] = torch.from_numpy(instances.bboxes) if nl else torch.zeros((nl, 4))
+        if self.return_keypoint:
+            labels['keypoints'] = torch.from_numpy(instances.keypoints)
+        # Then we can use collate_fn
+        if self.batch_idx:
+            labels['batch_idx'] = torch.zeros(nl)
+        return labels
+    def _format_img(self, img):
+        """Format the image for YOLO from Numpy array to PyTorch tensor."""
+        if len(img.shape) < 3:
+            img = np.expand_dims(img, -1)
+        img = np.ascontiguousarray(img.transpose(2, 0, 1)[::-1])
+        img = torch.from_numpy(img)
+        return img
+    def _format_segments(self, instances, cls, w, h):
+        """Convert polygon points to bitmap."""
+        segments = instances.segments
+        if self.mask_overlap:
+            masks, sorted_idx = polygons2masks_overlap((h, w), segments, downsample_ratio=self.mask_ratio)
+            masks = masks[None]  # (640, 640) -> (1, 640, 640)
+            instances = instances[sorted_idx]
+            cls = cls[sorted_idx]
+        else:
+            masks = polygons2masks((h, w), segments, color=1, downsample_ratio=self.mask_ratio)
+        return masks, instances, cls
+def v8_transforms(dataset, imgsz, hyp, stretch=False):
+    """Convert images to a size suitable for YOLOv8 training."""
+    pre_transform = Compose([
+        Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic),
+        CopyPaste(p=hyp.copy_paste),
+        RandomPerspective(
+            degrees=hyp.degrees,
+            translate=hyp.translate,
+            scale=hyp.scale,
+            shear=hyp.shear,
+            perspective=hyp.perspective,
+            pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
+        )])
+    flip_idx = dataset.data.get('flip_idx', [])  # for keypoints augmentation
+    if dataset.use_keypoints:
+        kpt_shape = dataset.data.get('kpt_shape', None)
+        if len(flip_idx) == 0 and hyp.fliplr > 0.0:
+            hyp.fliplr = 0.0
+            LOGGER.warning("WARNING ⚠️ No 'flip_idx' array defined in data.yaml, setting augmentation 'fliplr=0.0'")
+        elif flip_idx and (len(flip_idx) != kpt_shape[0]):
+            raise ValueError(f'data.yaml flip_idx={flip_idx} length must be equal to kpt_shape[0]={kpt_shape[0]}')
+    return Compose([
+        pre_transform,
+        MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
+        Albumentations(p=1.0),
+        RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
+        RandomFlip(direction='vertical', p=hyp.flipud),
+        RandomFlip(direction='horizontal', p=hyp.fliplr, flip_idx=flip_idx)])  # transforms
+# Classification augmentations -----------------------------------------------------------------------------------------
+def classify_transforms(size=224, rect=False, mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)):  # IMAGENET_MEAN, IMAGENET_STD
+    """Transforms to apply if albumentations not installed."""
+    if not isinstance(size, int):
+        raise TypeError(f'classify_transforms() size {size} must be integer, not (list, tuple)')
+    transforms = [ClassifyLetterBox(size, auto=True) if rect else CenterCrop(size), ToTensor()]
+    if any(mean) or any(std):
+        transforms.append(T.Normalize(mean, std, inplace=True))
+    return T.Compose(transforms)
+def hsv2colorjitter(h, s, v):
+    """Map HSV (hue, saturation, value) jitter into ColorJitter values (brightness, contrast, saturation, hue)"""
+    return v, v, s, h
+def classify_albumentations(
+        augment=True,
+        size=224,
+        scale=(0.08, 1.0),
+        hflip=0.5,
+        vflip=0.0,
+        hsv_h=0.015,  # image HSV-Hue augmentation (fraction)
+        hsv_s=0.7,  # image HSV-Saturation augmentation (fraction)
+        hsv_v=0.4,  # image HSV-Value augmentation (fraction)
+        mean=(0.0, 0.0, 0.0),  # IMAGENET_MEAN
+        std=(1.0, 1.0, 1.0),  # IMAGENET_STD
+        auto_aug=False,
+):
+    """YOLOv8 classification Albumentations (optional, only used if package is installed)."""
+    prefix = colorstr('albumentations: ')
+    try:
+        import albumentations as A
+        from albumentations.pytorch import ToTensorV2
+        check_version(A.__version__, '1.0.3', hard=True)  # version requirement
+        if augment:  # Resize and crop
+            T = [A.RandomResizedCrop(height=size, width=size, scale=scale)]
+            if auto_aug:
+                # TODO: implement AugMix, AutoAug & RandAug in albumentations
+                LOGGER.info(f'{prefix}auto augmentations are currently not supported')
+            else:
+                if hflip > 0:
+                    T += [A.HorizontalFlip(p=hflip)]
+                if vflip > 0:
+                    T += [A.VerticalFlip(p=vflip)]
+                if any((hsv_h, hsv_s, hsv_v)):
+                    T += [A.ColorJitter(*hsv2colorjitter(hsv_h, hsv_s, hsv_v))]  # brightness, contrast, saturation, hue
+        else:  # Use fixed crop for eval set (reproducibility)
+            T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)]
+        T += [A.Normalize(mean=mean, std=std), ToTensorV2()]  # Normalize and convert to Tensor
+        LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))
+        return A.Compose(T)
+    except ImportError:  # package not installed, skip
+        pass
+    except Exception as e:
+        LOGGER.info(f'{prefix}{e}')
+class ClassifyLetterBox:
+    """
+    YOLOv8 LetterBox class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+    T.Compose([LetterBox(size), ToTensor()]).
+    Attributes:
+        h (int): Target height of the image.
+        w (int): Target width of the image.
+        auto (bool): If True, automatically solves for short side using stride.
+        stride (int): The stride value, used when 'auto' is True.
+    """
+    def __init__(self, size=(640, 640), auto=False, stride=32):
+        """
+        Initializes the ClassifyLetterBox class with a target size, auto-flag, and stride.
+        Args:
+            size (Union[int, Tuple[int, int]]): The target dimensions (height, width) for the letterbox.
+            auto (bool): If True, automatically calculates the short side based on stride.
+            stride (int): The stride value, used when 'auto' is True.
+        """
+        super().__init__()
+        self.h, self.w = (size, size) if isinstance(size, int) else size
+        self.auto = auto  # pass max size integer, automatically solve for short side using stride
+        self.stride = stride  # used with auto
+    def __call__(self, im):
+        """
+        Resizes the image and pads it with a letterbox method.
+        Args:
+            im (numpy.ndarray): The input image as a numpy array of shape HWC.
+        Returns:
+            (numpy.ndarray): The letterboxed and resized image as a numpy array.
+        """
+        imh, imw = im.shape[:2]
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old dimensions
+        h, w = round(imh * r), round(imw * r)  # resized image dimensions
+        # Calculate padding dimensions
+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else (self.h, self.w)
+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+        # Create padded image
+        im_out = np.full((hs, ws, 3), 114, dtype=im.dtype)
+        im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
+        return im_out
+class CenterCrop:
+    """YOLOv8 CenterCrop class for image preprocessing, designed to be part of a transformation pipeline, e.g.,
+    T.Compose([CenterCrop(size), ToTensor()]).
+    """
+    def __init__(self, size=640):
+        """Converts an image from numpy array to PyTorch tensor."""
+        super().__init__()
+        self.h, self.w = (size, size) if isinstance(size, int) else size
+    def __call__(self, im):
+        """
+        Resizes and crops the center of the image using a letterbox method.
+        Args:
+            im (numpy.ndarray): The input image as a numpy array of shape HWC.
+        Returns:
+            (numpy.ndarray): The center-cropped and resized image as a numpy array.
+        """
+        imh, imw = im.shape[:2]
+        m = min(imh, imw)  # min dimension
+        top, left = (imh - m) // 2, (imw - m) // 2
+        return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR)
+class ToTensor:
+    """YOLOv8 ToTensor class for image preprocessing, i.e., T.Compose([LetterBox(size), ToTensor()])."""
+    def __init__(self, half=False):
+        """Initialize YOLOv8 ToTensor object with optional half-precision support."""
+        super().__init__()
+        self.half = half
+    def __call__(self, im):
+        """
+        Transforms an image from a numpy array to a PyTorch tensor, applying optional half-precision and normalization.
+        Args:
+            im (numpy.ndarray): Input image as a numpy array with shape (H, W, C) in BGR order.
+        Returns:
+            (torch.Tensor): The transformed image as a PyTorch tensor in float32 or float16, normalized to [0, 1].
+        """
+        im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1])  # HWC to CHW -> BGR to RGB -> contiguous
+        im = torch.from_numpy(im)  # to torch
+        im = im.half() if self.half else im.float()  # uint8 to fp16/32
+        im /= 255.0  # 0-255 to 0.0-1.0
+        return im

ultralytics/data/base.py ADDED Viewed

	@@ -0,0 +1,304 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import glob
+import math
+import os
+import random
+from copy import deepcopy
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+from typing import Optional
+import cv2
+import numpy as np
+import psutil
+from torch.utils.data import Dataset
+from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
+from .utils import HELP_URL, IMG_FORMATS
+class BaseDataset(Dataset):
+    """
+    Base dataset class for loading and processing image data.
+    Args:
+        img_path (str): Path to the folder containing images.
+        imgsz (int, optional): Image size. Defaults to 640.
+        cache (bool, optional): Cache images to RAM or disk during training. Defaults to False.
+        augment (bool, optional): If True, data augmentation is applied. Defaults to True.
+        hyp (dict, optional): Hyperparameters to apply data augmentation. Defaults to None.
+        prefix (str, optional): Prefix to print in log messages. Defaults to ''.
+        rect (bool, optional): If True, rectangular training is used. Defaults to False.
+        batch_size (int, optional): Size of batches. Defaults to None.
+        stride (int, optional): Stride. Defaults to 32.
+        pad (float, optional): Padding. Defaults to 0.0.
+        single_cls (bool, optional): If True, single class training is used. Defaults to False.
+        classes (list): List of included classes. Default is None.
+        fraction (float): Fraction of dataset to utilize. Default is 1.0 (use all data).
+    Attributes:
+        im_files (list): List of image file paths.
+        labels (list): List of label data dictionaries.
+        ni (int): Number of images in the dataset.
+        ims (list): List of loaded images.
+        npy_files (list): List of numpy file paths.
+        transforms (callable): Image transformation function.
+    """
+    def __init__(self,
+                 img_path,
+                 imgsz=640,
+                 cache=False,
+                 augment=True,
+                 hyp=DEFAULT_CFG,
+                 prefix='',
+                 rect=False,
+                 batch_size=16,
+                 stride=32,
+                 pad=0.5,
+                 single_cls=False,
+                 classes=None,
+                 fraction=1.0):
+        """Initialize BaseDataset with given configuration and options."""
+        super().__init__()
+        self.img_path = img_path
+        self.imgsz = imgsz
+        self.augment = augment
+        self.single_cls = single_cls
+        self.prefix = prefix
+        self.fraction = fraction
+        self.im_files = self.get_img_files(self.img_path)
+        self.labels = self.get_labels()
+        self.update_labels(include_class=classes)  # single_cls and include_class
+        self.ni = len(self.labels)  # number of images
+        self.rect = rect
+        self.batch_size = batch_size
+        self.stride = stride
+        self.pad = pad
+        if self.rect:
+            assert self.batch_size is not None
+            self.set_rectangle()
+        # Buffer thread for mosaic images
+        self.buffer = []  # buffer size = batch size
+        self.max_buffer_length = min((self.ni, self.batch_size * 8, 1000)) if self.augment else 0
+        # Cache images
+        if cache == 'ram' and not self.check_cache_ram():
+            cache = False
+        self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
+        self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files]
+        if cache:
+            self.cache_images(cache)
+        # Transforms
+        self.transforms = self.build_transforms(hyp=hyp)
+    def get_img_files(self, img_path):
+        """Read image files."""
+        try:
+            f = []  # image files
+            for p in img_path if isinstance(img_path, list) else [img_path]:
+                p = Path(p)  # os-agnostic
+                if p.is_dir():  # dir
+                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)
+                    # F = list(p.rglob('*.*'))  # pathlib
+                elif p.is_file():  # file
+                    with open(p) as t:
+                        t = t.read().strip().splitlines()
+                        parent = str(p.parent) + os.sep
+                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
+                        # F += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
+                else:
+                    raise FileNotFoundError(f'{self.prefix}{p} does not exist')
+            im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS)
+            # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS])  # pathlib
+            assert im_files, f'{self.prefix}No images found in {img_path}'
+        except Exception as e:
+            raise FileNotFoundError(f'{self.prefix}Error loading data from {img_path}\n{HELP_URL}') from e
+        if self.fraction < 1:
+            im_files = im_files[:round(len(im_files) * self.fraction)]
+        return im_files
+    def update_labels(self, include_class: Optional[list]):
+        """Update labels to include only these classes (optional)."""
+        include_class_array = np.array(include_class).reshape(1, -1)
+        for i in range(len(self.labels)):
+            if include_class is not None:
+                cls = self.labels[i]['cls']
+                bboxes = self.labels[i]['bboxes']
+                segments = self.labels[i]['segments']
+                keypoints = self.labels[i]['keypoints']
+                j = (cls == include_class_array).any(1)
+                self.labels[i]['cls'] = cls[j]
+                self.labels[i]['bboxes'] = bboxes[j]
+                if segments:
+                    self.labels[i]['segments'] = [segments[si] for si, idx in enumerate(j) if idx]
+                if keypoints is not None:
+                    self.labels[i]['keypoints'] = keypoints[j]
+            if self.single_cls:
+                self.labels[i]['cls'][:, 0] = 0
+    def load_image(self, i, rect_mode=True):
+        """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
+        im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
+        if im is None:  # not cached in RAM
+            if fn.exists():  # load npy
+                try:
+                    im = np.load(fn)
+                except Exception as e:
+                    LOGGER.warning(f'{self.prefix}WARNING ⚠️ Removing corrupt *.npy image file {fn} due to: {e}')
+                    Path(fn).unlink(missing_ok=True)
+                    im = cv2.imread(f)  # BGR
+            else:  # read image
+                im = cv2.imread(f)  # BGR
+            if im is None:
+                raise FileNotFoundError(f'Image Not Found {f}')
+            h0, w0 = im.shape[:2]  # orig hw
+            if rect_mode:  # resize long side to imgsz while maintaining aspect ratio
+                r = self.imgsz / max(h0, w0)  # ratio
+                if r != 1:  # if sizes are not equal
+                    w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
+                    im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
+            elif not (h0 == w0 == self.imgsz):  # resize by stretching image to square imgsz
+                im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
+            # Add to buffer if training with augmentations
+            if self.augment:
+                self.ims[i], self.im_hw0[i], self.im_hw[i] = im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized
+                self.buffer.append(i)
+                if len(self.buffer) >= self.max_buffer_length:
+                    j = self.buffer.pop(0)
+                    self.ims[j], self.im_hw0[j], self.im_hw[j] = None, None, None
+            return im, (h0, w0), im.shape[:2]
+        return self.ims[i], self.im_hw0[i], self.im_hw[i]
+    def cache_images(self, cache):
+        """Cache images to memory or disk."""
+        b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
+        fcn = self.cache_images_to_disk if cache == 'disk' else self.load_image
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(fcn, range(self.ni))
+            pbar = TQDM(enumerate(results), total=self.ni, disable=LOCAL_RANK > 0)
+            for i, x in pbar:
+                if cache == 'disk':
+                    b += self.npy_files[i].stat().st_size
+                else:  # 'ram'
+                    self.ims[i], self.im_hw0[i], self.im_hw[i] = x  # im, hw_orig, hw_resized = load_image(self, i)
+                    b += self.ims[i].nbytes
+                pbar.desc = f'{self.prefix}Caching images ({b / gb:.1f}GB {cache})'
+            pbar.close()
+    def cache_images_to_disk(self, i):
+        """Saves an image as an *.npy file for faster loading."""
+        f = self.npy_files[i]
+        if not f.exists():
+            np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
+    def check_cache_ram(self, safety_margin=0.5):
+        """Check image caching requirements vs available memory."""
+        b, gb = 0, 1 << 30  # bytes of cached images, bytes per gigabytes
+        n = min(self.ni, 30)  # extrapolate from 30 random images
+        for _ in range(n):
+            im = cv2.imread(random.choice(self.im_files))  # sample image
+            ratio = self.imgsz / max(im.shape[0], im.shape[1])  # max(h, w)  # ratio
+            b += im.nbytes * ratio ** 2
+        mem_required = b * self.ni / n * (1 + safety_margin)  # GB required to cache dataset into RAM
+        mem = psutil.virtual_memory()
+        cache = mem_required < mem.available  # to cache or not to cache, that is the question
+        if not cache:
+            LOGGER.info(f'{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images '
+                        f'with {int(safety_margin * 100)}% safety margin but only '
+                        f'{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, '
+                        f"{'caching images ✅' if cache else 'not caching images ⚠️'}")
+        return cache
+    def set_rectangle(self):
+        """Sets the shape of bounding boxes for YOLO detections as rectangles."""
+        bi = np.floor(np.arange(self.ni) / self.batch_size).astype(int)  # batch index
+        nb = bi[-1] + 1  # number of batches
+        s = np.array([x.pop('shape') for x in self.labels])  # hw
+        ar = s[:, 0] / s[:, 1]  # aspect ratio
+        irect = ar.argsort()
+        self.im_files = [self.im_files[i] for i in irect]
+        self.labels = [self.labels[i] for i in irect]
+        ar = ar[irect]
+        # Set training image shapes
+        shapes = [[1, 1]] * nb
+        for i in range(nb):
+            ari = ar[bi == i]
+            mini, maxi = ari.min(), ari.max()
+            if maxi < 1:
+                shapes[i] = [maxi, 1]
+            elif mini > 1:
+                shapes[i] = [1, 1 / mini]
+        self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
+        self.batch = bi  # batch index of image
+    def __getitem__(self, index):
+        """Returns transformed label information for given index."""
+        return self.transforms(self.get_image_and_label(index))
+    def get_image_and_label(self, index):
+        """Get and return label information from the dataset."""
+        label = deepcopy(self.labels[index])  # requires deepcopy() https://github.com/ultralytics/ultralytics/pull/1948
+        label.pop('shape', None)  # shape is for rect, remove it
+        label['img'], label['ori_shape'], label['resized_shape'] = self.load_image(index)
+        label['ratio_pad'] = (label['resized_shape'][0] / label['ori_shape'][0],
+                              label['resized_shape'][1] / label['ori_shape'][1])  # for evaluation
+        if self.rect:
+            label['rect_shape'] = self.batch_shapes[self.batch[index]]
+        return self.update_labels_info(label)
+    def __len__(self):
+        """Returns the length of the labels list for the dataset."""
+        return len(self.labels)
+    def update_labels_info(self, label):
+        """Custom your label format here."""
+        return label
+    def build_transforms(self, hyp=None):
+        """
+        Users can customize augmentations here.
+        Example:
+            ```python
+            if self.augment:
+                # Training transforms
+                return Compose([])
+            else:
+                # Val transforms
+                return Compose([])
+            ```
+        """
+        raise NotImplementedError
+    def get_labels(self):
+        """
+        Users can customize their own format here.
+        Note:
+            Ensure output is a dictionary with the following keys:
+            ```python
+            dict(
+                im_file=im_file,
+                shape=shape,  # format: (height, width)
+                cls=cls,
+                bboxes=bboxes, # xywh
+                segments=segments,  # xy
+                keypoints=keypoints, # xy
+                normalized=True, # or False
+                bbox_format="xyxy",  # or xywh, ltwh
+            )
+            ```
+        """
+        raise NotImplementedError