Adieee5 commited on Jul 26, 2025

Commit

5bd4739

verified ·

1 Parent(s): 19dd704

Upload 212 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

doclayout_yolo/.DS_Store +0 -0
doclayout_yolo/__init__.py +27 -0
doclayout_yolo/assets/bus.jpg +3 -0
doclayout_yolo/assets/zidane.jpg +0 -0
doclayout_yolo/cfg/__init__.py +609 -0
doclayout_yolo/cfg/datasets/d4la.yaml +45 -0
doclayout_yolo/cfg/datasets/doclaynet.yaml +28 -0
doclayout_yolo/cfg/datasets/docsynth300k.yaml +91 -0
doclayout_yolo/cfg/default.yaml +127 -0
doclayout_yolo/cfg/models/README.md +40 -0
doclayout_yolo/cfg/models/rt-detr/rtdetr-l.yaml +50 -0
doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet101.yaml +42 -0
doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet50.yaml +42 -0
doclayout_yolo/cfg/models/rt-detr/rtdetr-x.yaml +54 -0
doclayout_yolo/cfg/models/v10/yolov10b.yaml +40 -0
doclayout_yolo/cfg/models/v10/yolov10l.yaml +40 -0
doclayout_yolo/cfg/models/v10/yolov10m-doclayout.yaml +43 -0
doclayout_yolo/cfg/models/v10/yolov10m.yaml +43 -0
doclayout_yolo/cfg/models/v10/yolov10n.yaml +40 -0
doclayout_yolo/cfg/models/v10/yolov10s.yaml +39 -0
doclayout_yolo/cfg/models/v10/yolov10x.yaml +40 -0
doclayout_yolo/cfg/models/v3/yolov3-spp.yaml +46 -0
doclayout_yolo/cfg/models/v3/yolov3-tiny.yaml +37 -0
doclayout_yolo/cfg/models/v3/yolov3.yaml +46 -0
doclayout_yolo/cfg/models/v5/yolov5-p6.yaml +59 -0
doclayout_yolo/cfg/models/v5/yolov5.yaml +48 -0
doclayout_yolo/cfg/models/v6/yolov6.yaml +53 -0
doclayout_yolo/cfg/models/v8/yolov8-cls-resnet101.yaml +25 -0
doclayout_yolo/cfg/models/v8/yolov8-cls-resnet50.yaml +25 -0
doclayout_yolo/cfg/models/v8/yolov8-cls.yaml +29 -0
doclayout_yolo/cfg/models/v8/yolov8-ghost-p2.yaml +54 -0
doclayout_yolo/cfg/models/v8/yolov8-ghost-p6.yaml +56 -0
doclayout_yolo/cfg/models/v8/yolov8-ghost.yaml +47 -0
doclayout_yolo/cfg/models/v8/yolov8-obb.yaml +46 -0
doclayout_yolo/cfg/models/v8/yolov8-p2.yaml +54 -0
doclayout_yolo/cfg/models/v8/yolov8-p6.yaml +56 -0
doclayout_yolo/cfg/models/v8/yolov8-pose-p6.yaml +57 -0
doclayout_yolo/cfg/models/v8/yolov8-pose.yaml +47 -0
doclayout_yolo/cfg/models/v8/yolov8-rtdetr.yaml +46 -0
doclayout_yolo/cfg/models/v8/yolov8-seg-p6.yaml +56 -0
doclayout_yolo/cfg/models/v8/yolov8-seg.yaml +46 -0
doclayout_yolo/cfg/models/v8/yolov8-world.yaml +48 -0
doclayout_yolo/cfg/models/v8/yolov8-worldv2.yaml +46 -0
doclayout_yolo/cfg/models/v8/yolov8.yaml +46 -0
doclayout_yolo/cfg/models/v9/yolov9c.yaml +36 -0
doclayout_yolo/cfg/models/v9/yolov9e.yaml +60 -0
doclayout_yolo/cfg/trackers/botsort.yaml +18 -0
doclayout_yolo/cfg/trackers/bytetrack.yaml +11 -0
doclayout_yolo/data/__init__.py +15 -0
doclayout_yolo/data/annotator.py +50 -0

doclayout_yolo/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

doclayout_yolo/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+__version__ = "0.0.2"
+from doclayout_yolo.data.explorer.explorer import Explorer
+from doclayout_yolo.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
+from doclayout_yolo.models.fastsam import FastSAM
+from doclayout_yolo.models.nas import NAS
+from doclayout_yolo.utils import ASSETS, SETTINGS as settings
+from doclayout_yolo.utils.checks import check_yolo as checks
+from doclayout_yolo.utils.downloads import download
+__all__ = (
+    "__version__",
+    "ASSETS",
+    "YOLO",
+    "YOLOWorld",
+    "NAS",
+    "SAM",
+    "FastSAM",
+    "RTDETR",
+    "checks",
+    "download",
+    "settings",
+    "Explorer",
+    "YOLOv10"
+)

doclayout_yolo/assets/bus.jpg ADDED Viewed

Git LFS Details

SHA256: c02019c4979c191eb739ddd944445ef408dad5679acab6fd520ef9d434bfbc63
Pointer size: 131 Bytes
Size of remote file: 137 kB

doclayout_yolo/assets/zidane.jpg ADDED Viewed

doclayout_yolo/cfg/__init__.py ADDED Viewed

	@@ -0,0 +1,609 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+import pdb
+import contextlib
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Dict, List, Union
+from doclayout_yolo.utils import (
+    ASSETS,
+    DEFAULT_CFG,
+    DEFAULT_CFG_DICT,
+    DEFAULT_CFG_PATH,
+    LOGGER,
+    RANK,
+    ROOT,
+    RUNS_DIR,
+    SETTINGS,
+    SETTINGS_YAML,
+    TESTS_RUNNING,
+    IterableSimpleNamespace,
+    __version__,
+    checks,
+    colorstr,
+    deprecation_warn,
+    yaml_load,
+    yaml_print,
+)
+# Define valid tasks and modes
+MODES = {"train", "val", "predict", "export", "track", "benchmark"}
+TASKS = {"detect", "segment", "classify", "pose", "obb"}
+TASK2DATA = {
+    "detect": "coco8.yaml",
+    "segment": "coco8-seg.yaml",
+    "classify": "imagenet10",
+    "pose": "coco8-pose.yaml",
+    "obb": "dota8.yaml",
+}
+TASK2MODEL = {
+    "detect": "yolov8n.pt",
+    "segment": "yolov8n-seg.pt",
+    "classify": "yolov8n-cls.pt",
+    "pose": "yolov8n-pose.pt",
+    "obb": "yolov8n-obb.pt",
+}
+TASK2METRIC = {
+    "detect": "metrics/mAP50-95(B)",
+    "segment": "metrics/mAP50-95(M)",
+    "classify": "metrics/accuracy_top1",
+    "pose": "metrics/mAP50-95(P)",
+    "obb": "metrics/mAP50-95(B)",
+}
+CLI_HELP_MSG = f"""
+    Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
+        yolo TASK MODE ARGS
+        Where   TASK (optional) is one of {TASKS}
+                MODE (required) is one of {MODES}
+                ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
+                    See all ARGS at https://docs.doclayout_yolo.com/usage/cfg or with 'yolo cfg'
+    1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
+        yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
+    2. Predict a YouTube video using a pretrained segmentation model at image size 320:
+        yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
+    3. Val a pretrained detection model at batch-size 1 and image size 640:
+        yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
+    4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
+        yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
+    6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
+        yolo explorer
+    5. Run special commands:
+        yolo help
+        yolo checks
+        yolo version
+        yolo settings
+        yolo copy-cfg
+        yolo cfg
+    Docs: https://docs.doclayout_yolo.com
+    Community: https://community.doclayout_yolo.com
+    GitHub: https://github.com/doclayout_yolo/doclayout_yolo
+    """
+# Define keys for arg type checks
+CFG_FLOAT_KEYS = {"warmup_epochs", "box", "cls", "dfl", "degrees", "shear", "time"}
+CFG_FRACTION_KEYS = {
+    "dropout",
+    "iou",
+    "lr0",
+    "lrf",
+    "momentum",
+    "weight_decay",
+    "warmup_momentum",
+    "warmup_bias_lr",
+    "label_smoothing",
+    "hsv_h",
+    "hsv_s",
+    "hsv_v",
+    "translate",
+    "scale",
+    "perspective",
+    "flipud",
+    "fliplr",
+    "bgr",
+    "mosaic",
+    "mixup",
+    "copy_paste",
+    "conf",
+    "iou",
+    "fraction",
+}  # fraction floats 0.0 - 1.0
+CFG_INT_KEYS = {
+    "epochs",
+    "patience",
+    "batch",
+    "workers",
+    "seed",
+    "close_mosaic",
+    "mask_ratio",
+    "max_det",
+    "vid_stride",
+    "line_width",
+    "workspace",
+    "nbs",
+    "save_period",
+}
+CFG_BOOL_KEYS = {
+    "save",
+    "exist_ok",
+    "verbose",
+    "deterministic",
+    "single_cls",
+    "rect",
+    "cos_lr",
+    "overlap_mask",
+    "val",
+    "save_json",
+    "save_hybrid",
+    "half",
+    "dnn",
+    "plots",
+    "show",
+    "save_txt",
+    "save_conf",
+    "save_crop",
+    "save_frames",
+    "show_labels",
+    "show_conf",
+    "visualize",
+    "augment",
+    "agnostic_nms",
+    "retina_masks",
+    "show_boxes",
+    "keras",
+    "optimize",
+    "int8",
+    "dynamic",
+    "simplify",
+    "nms",
+    "profile",
+    "multi_scale",
+    "fuse"
+}
+def cfg2dict(cfg):
+    """
+    Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
+    Args:
+        cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
+    Returns:
+        cfg (dict): Configuration object in dictionary format.
+    """
+    if isinstance(cfg, (str, Path)):
+        cfg = yaml_load(cfg)  # load dict
+    elif isinstance(cfg, SimpleNamespace):
+        cfg = vars(cfg)  # convert to dict
+    return cfg
+def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
+    """
+    Load and merge configuration data from a file or dictionary.
+    Args:
+        cfg (str | Path | Dict | SimpleNamespace): Configuration data.
+        overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
+    Returns:
+        (SimpleNamespace): Training arguments namespace.
+    """
+    cfg = cfg2dict(cfg)
+    # Merge overrides
+    if overrides:
+        overrides = cfg2dict(overrides)
+        if "save_dir" not in cfg:
+            overrides.pop("save_dir", None)  # special override keys to ignore
+        check_dict_alignment(cfg, overrides)
+        cfg = {**cfg, **overrides}  # merge cfg and overrides dicts (prefer overrides)
+    # Special handling for numeric project/name
+    for k in "project", "name":
+        if k in cfg and isinstance(cfg[k], (int, float)):
+            cfg[k] = str(cfg[k])
+    if cfg.get("name") == "model":  # assign model to 'name' arg
+        cfg["name"] = cfg.get("model", "").split(".")[0]
+        LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
+    # Type and Value checks
+    check_cfg(cfg)
+    # Return instance
+    return IterableSimpleNamespace(**cfg)
+def check_cfg(cfg, hard=True):
+    """Check Ultralytics configuration argument types and values."""
+    for k, v in cfg.items():
+        if v is not None:  # None values may be from optional args
+            if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                        f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
+                    )
+                cfg[k] = float(v)
+            elif k in CFG_FRACTION_KEYS:
+                if not isinstance(v, (int, float)):
+                    if hard:
+                        raise TypeError(
+                            f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                            f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
+                        )
+                    cfg[k] = v = float(v)
+                if not (0.0 <= v <= 1.0):
+                    raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
+            elif k in CFG_INT_KEYS and not isinstance(v, int):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
+                    )
+                cfg[k] = int(v)
+            elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
+                if hard:
+                    raise TypeError(
+                        f"'{k}={v}' is of invalid type {type(v).__name__}. "
+                        f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')"
+                    )
+                cfg[k] = bool(v)
+def get_save_dir(args, name=None):
+    """Return save_dir as created from train/val/predict arguments."""
+    if getattr(args, "save_dir", None):
+        save_dir = args.save_dir
+    else:
+        from doclayout_yolo.utils.files import increment_path
+        project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
+        name = name or args.name or f"{args.mode}"
+        save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
+    return Path(save_dir)
+def _handle_deprecation(custom):
+    """Hardcoded function to handle deprecated config keys."""
+    for key in custom.copy().keys():
+        if key == "boxes":
+            deprecation_warn(key, "show_boxes")
+            custom["show_boxes"] = custom.pop("boxes")
+        if key == "hide_labels":
+            deprecation_warn(key, "show_labels")
+            custom["show_labels"] = custom.pop("hide_labels") == "False"
+        if key == "hide_conf":
+            deprecation_warn(key, "show_conf")
+            custom["show_conf"] = custom.pop("hide_conf") == "False"
+        if key == "line_thickness":
+            deprecation_warn(key, "line_width")
+            custom["line_width"] = custom.pop("line_thickness")
+    return custom
+def check_dict_alignment(base: Dict, custom: Dict, e=None):
+    """
+    This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
+    any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
+    Args:
+        custom (dict): a dictionary of custom configuration options
+        base (dict): a dictionary of base configuration options
+        e (Error, optional): An optional error that is passed by the calling function.
+    """
+    custom = _handle_deprecation(custom)
+    base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
+    mismatched = [k for k in custom_keys if k not in base_keys]
+    if mismatched:
+        from difflib import get_close_matches
+        string = ""
+        for x in mismatched:
+            matches = get_close_matches(x, base_keys)  # key list
+            matches = [f"{k}={base[k]}" if base.get(k) is not None else k for k in matches]
+            match_str = f"Similar arguments are i.e. {matches}." if matches else ""
+            string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
+        raise SyntaxError(string + CLI_HELP_MSG) from e
+def merge_equals_args(args: List[str]) -> List[str]:
+    """
+    Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
+    argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
+    Args:
+        args (List[str]): A list of strings where each element is an argument.
+    Returns:
+        (List[str]): A list of strings where the arguments around isolated '=' are merged.
+    """
+    new_args = []
+    for i, arg in enumerate(args):
+        if arg == "=" and 0 < i < len(args) - 1:  # merge ['arg', '=', 'val']
+            new_args[-1] += f"={args[i + 1]}"
+            del args[i + 1]
+        elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]:  # merge ['arg=', 'val']
+            new_args.append(f"{arg}{args[i + 1]}")
+            del args[i + 1]
+        elif arg.startswith("=") and i > 0:  # merge ['arg', '=val']
+            new_args[-1] += arg
+        else:
+            new_args.append(arg)
+    return new_args
+def handle_yolo_hub(args: List[str]) -> None:
+    """
+    Handle Ultralytics HUB command-line interface (CLI) commands.
+    This function processes Ultralytics HUB CLI commands such as login and logout.
+    It should be called when executing a script with arguments related to HUB authentication.
+    Args:
+        args (List[str]): A list of command line arguments
+    Example:
+        ```bash
+        python my_script.py hub login your_api_key
+        ```
+    """
+    from doclayout_yolo import hub
+    if args[0] == "login":
+        key = args[1] if len(args) > 1 else ""
+        # Log in to Ultralytics HUB using the provided API key
+        hub.login(key)
+    elif args[0] == "logout":
+        # Log out from Ultralytics HUB
+        hub.logout()
+def handle_yolo_settings(args: List[str]) -> None:
+    """
+    Handle YOLO settings command-line interface (CLI) commands.
+    This function processes YOLO settings CLI commands such as reset.
+    It should be called when executing a script with arguments related to YOLO settings management.
+    Args:
+        args (List[str]): A list of command line arguments for YOLO settings management.
+    Example:
+        ```bash
+        python my_script.py yolo settings reset
+        ```
+    """
+    url = "https://docs.doclayout_yolo.com/quickstart/#doclayout_yolo-settings"  # help URL
+    try:
+        if any(args):
+            if args[0] == "reset":
+                SETTINGS_YAML.unlink()  # delete the settings file
+                SETTINGS.reset()  # create new settings
+                LOGGER.info("Settings reset successfully")  # inform the user that settings have been reset
+            else:  # save a new setting
+                new = dict(parse_key_value_pair(a) for a in args)
+                check_dict_alignment(SETTINGS, new)
+                SETTINGS.update(new)
+        LOGGER.info(f"💡 Learn about settings at {url}")
+        yaml_print(SETTINGS_YAML)  # print the current settings
+    except Exception as e:
+        LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
+def handle_explorer():
+    """Open the Ultralytics Explorer GUI."""
+    checks.check_requirements("streamlit")
+    LOGGER.info("💡 Loading Explorer dashboard...")
+    subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
+def parse_key_value_pair(pair):
+    """Parse one 'key=value' pair and return key and value."""
+    k, v = pair.split("=", 1)  # split on first '=' sign
+    k, v = k.strip(), v.strip()  # remove spaces
+    assert v, f"missing '{k}' value"
+    return k, smart_value(v)
+def smart_value(v):
+    """Convert a string to an underlying type such as int, float, bool, etc."""
+    v_lower = v.lower()
+    if v_lower == "none":
+        return None
+    elif v_lower == "true":
+        return True
+    elif v_lower == "false":
+        return False
+    else:
+        with contextlib.suppress(Exception):
+            return eval(v)
+        return v
+def entrypoint(debug=""):
+    """
+    This function is the doclayout_yolo package entrypoint, it's responsible for parsing the command line arguments passed
+    to the package.
+    This function allows for:
+    - passing mandatory YOLO args as a list of strings
+    - specifying the task to be performed, either 'detect', 'segment' or 'classify'
+    - specifying the mode, either 'train', 'val', 'test', or 'predict'
+    - running special modes like 'checks'
+    - passing overrides to the package's configuration
+    It uses the package's default cfg and initializes it using the passed overrides.
+    Then it calls the CLI function with the composed cfg
+    """
+    args = (debug.split(" ") if debug else sys.argv)[1:]
+    if not args:  # no arguments passed
+        LOGGER.info(CLI_HELP_MSG)
+        return
+    special = {
+        "help": lambda: LOGGER.info(CLI_HELP_MSG),
+        "checks": checks.collect_system_info,
+        "version": lambda: LOGGER.info(__version__),
+        "settings": lambda: handle_yolo_settings(args[1:]),
+        "cfg": lambda: yaml_print(DEFAULT_CFG_PATH),
+        "hub": lambda: handle_yolo_hub(args[1:]),
+        "login": lambda: handle_yolo_hub(args),
+        "copy-cfg": copy_default_cfg,
+        "explorer": lambda: handle_explorer(),
+    }
+    full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
+    # Define common misuses of special commands, i.e. -h, -help, --help
+    special.update({k[0]: v for k, v in special.items()})  # singular
+    special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith("s")})  # singular
+    special = {**special, **{f"-{k}": v for k, v in special.items()}, **{f"--{k}": v for k, v in special.items()}}
+    overrides = {}  # basic overrides, i.e. imgsz=320
+    for a in merge_equals_args(args):  # merge spaces around '=' sign
+        if a.startswith("--"):
+            LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
+            a = a[2:]
+        if a.endswith(","):
+            LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
+            a = a[:-1]
+        if "=" in a:
+            try:
+                k, v = parse_key_value_pair(a)
+                if k == "cfg" and v is not None:  # custom.yaml passed
+                    LOGGER.info(f"Overriding {DEFAULT_CFG_PATH} with {v}")
+                    overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != "cfg"}
+                else:
+                    overrides[k] = v
+            except (NameError, SyntaxError, ValueError, AssertionError) as e:
+                check_dict_alignment(full_args_dict, {a: ""}, e)
+        elif a in TASKS:
+            overrides["task"] = a
+        elif a in MODES:
+            overrides["mode"] = a
+        elif a.lower() in special:
+            special[a.lower()]()
+            return
+        elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
+            overrides[a] = True  # auto-True for default bool args, i.e. 'yolo show' sets show=True
+        elif a in DEFAULT_CFG_DICT:
+            raise SyntaxError(
+                f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
+                f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}"
+            )
+        else:
+            check_dict_alignment(full_args_dict, {a: ""})
+    # Check keys
+    check_dict_alignment(full_args_dict, overrides)
+    # Mode
+    mode = overrides.get("mode")
+    if mode is None:
+        mode = DEFAULT_CFG.mode or "predict"
+        LOGGER.warning(f"WARNING ⚠️ 'mode' argument is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
+    elif mode not in MODES:
+        raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
+    # Task
+    task = overrides.pop("task", None)
+    if task:
+        if task not in TASKS:
+            raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
+        if "model" not in overrides:
+            overrides["model"] = TASK2MODEL[task]
+    # Model
+    model = overrides.pop("model", DEFAULT_CFG.model)
+    if model is None:
+        model = "yolov8n.pt"
+        LOGGER.warning(f"WARNING ⚠️ 'model' argument is missing. Using default 'model={model}'.")
+    overrides["model"] = model
+    # stem = Path(model).stem.lower()
+    stem = model.lower()
+    if "rtdetr" in stem:  # guess architecture
+        from doclayout_yolo import RTDETR
+        model = RTDETR(model)  # no task argument
+    elif "fastsam" in stem:
+        from doclayout_yolo import FastSAM
+        model = FastSAM(model)
+    elif "sam" in stem:
+        from doclayout_yolo import SAM
+        model = SAM(model)
+    elif "yolov10" in stem:
+        from doclayout_yolo import YOLOv10
+        model = YOLOv10(model)
+    else:
+        from doclayout_yolo import YOLO
+        model = YOLO(model, task=task)
+    if isinstance(overrides.get("pretrained"), str):
+        model.load(overrides["pretrained"])
+    # Task Update
+    if task != model.task:
+        if task:
+            LOGGER.warning(
+                f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
+                f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model."
+            )
+        task = model.task
+    # Mode
+    if mode in ("predict", "track") and "source" not in overrides:
+        overrides["source"] = DEFAULT_CFG.source or ASSETS
+        LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.")
+    elif mode in ("train", "val"):
+        if "data" not in overrides and "resume" not in overrides:
+            overrides["data"] = DEFAULT_CFG.data or TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
+            LOGGER.warning(f"WARNING ⚠️ 'data' argument is missing. Using default 'data={overrides['data']}'.")
+    elif mode == "export":
+        if "format" not in overrides:
+            overrides["format"] = DEFAULT_CFG.format or "torchscript"
+            LOGGER.warning(f"WARNING ⚠️ 'format' argument is missing. Using default 'format={overrides['format']}'.")
+    # Run command in python
+    getattr(model, mode)(**overrides)  # default args from model
+    # Show help
+    LOGGER.info(f"💡 Learn more at https://docs.doclayout_yolo.com/modes/{mode}")
+# Special modes --------------------------------------------------------------------------------------------------------
+def copy_default_cfg():
+    """Copy and create a new default configuration file with '_copy' appended to its name."""
+    new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
+    shutil.copy2(DEFAULT_CFG_PATH, new_file)
+    LOGGER.info(
+        f"{DEFAULT_CFG_PATH} copied to {new_file}\n"
+        f"Example YOLO command with this new custom cfg:\n    yolo cfg='{new_file}' imgsz=320 batch=8"
+    )
+if __name__ == "__main__":
+    # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
+    entrypoint(debug="")

doclayout_yolo/cfg/datasets/d4la.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.doclayout_yolo.com/datasets/detect/coco/
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── doclayout_yolo
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+# path: /mnt/hwfile/opendatalab/zhaozhiyuan/yolov10/D4LA # dataset root dir
+path: ./layout_data/D4LA
+train: train.txt # train images (relative to 'path') 118287 images
+val: test.txt # val images (relative to 'path') 5000 images
+test: test.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Classes
+names:
+  0: "DocTitle"
+  1: "ParaTitle"
+  2: "ParaText"
+  3: "ListText"
+  4: "RegionTitle"
+  5: "Date"
+  6: "LetterHead"
+  7: "LetterDear"
+  8: "LetterSign"
+  9: "Question"
+  10: "OtherText"
+  11: "RegionKV"
+  12: "RegionList"
+  13: "Abstract"
+  14: "Author"
+  15: "TableName"
+  16: "Table"
+  17: "Figure"
+  18: "FigureName"
+  19: "Equation"
+  20: "Reference"
+  21: "Footer"
+  22: "PageHeader"
+  23: "PageFooter"
+  24: "Number"
+  25: "Catalog"
+  26: "PageNumber"

doclayout_yolo/cfg/datasets/doclaynet.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.doclayout_yolo.com/datasets/detect/coco/
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── doclayout_yolo
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ./layout_data/doclaynet # dataset root dir
+train: train.txt # train images (relative to 'path') 118287 images
+val: val.txt # val images (relative to 'path') 5000 images
+test: val.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Classes
+names:
+  0: Caption
+  1: Footnote
+  2: Formula
+  3: List-item
+  4: Page-footer
+  5: Page-header
+  6: Picture
+  7: Section-header
+  8: Table
+  9: Text
+  10: Title

doclayout_yolo/cfg/datasets/docsynth300k.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# COCO 2017 dataset https://cocodataset.org by Microsoft
+# Documentation: https://docs.doclayout_yolo.com/datasets/detect/coco/
+# Example usage: yolo train data=coco.yaml
+# parent
+# ├── doclayout_yolo
+# └── datasets
+#     └── coco  ← downloads here (20.1 GB)
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: ./layout_data/docsynth300k # dataset root dir
+train: train300k.txt # train images (relative to 'path') 118287 images
+val: val.txt # val images (relative to 'path') 5000 images
+test: val.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+# Classes
+names:
+  0: 'QR code'
+  1: 'advertisement'
+  2: 'algorithm'
+  3: 'answer'
+  4: 'author'
+  5: 'barcode'
+  6: 'bill'
+  7: 'blank'
+  8: 'bracket'
+  9: 'breakout'
+  10: 'byline'
+  11: 'caption'
+  12: 'catalogue'
+  13: 'chapter title'
+  14: 'code'
+  15: 'correction'
+  16: 'credit'
+  17: 'dateline'
+  18: 'drop cap'
+  19: "editor's note"
+  20: 'endnote'
+  21: 'examinee information'
+  22: 'fifth-level title'
+  23: 'figure'
+  24: 'first-level question number'
+  25: 'first-level title'
+  26: 'flag'
+  27: 'folio'
+  28: 'footer'
+  29: 'footnote'
+  30: 'formula'
+  31: 'fourth-level section title'
+  32: 'fourth-level title'
+  33: 'header'
+  34: 'headline'
+  35: 'index'
+  36: 'inside'
+  37: 'institute'
+  38: 'jump line'
+  39: 'kicker'
+  40: 'lead'
+  41: 'marginal note'
+  42: 'matching'
+  43: 'mugshot'
+  44: 'option'
+  45: 'ordered list'
+  46: 'other question number'
+  47: 'page number'
+  48: 'paragraph'
+  49: 'part'
+  50: 'play'
+  51: 'poem'
+  52: 'reference'
+  53: 'sealing line'
+  54: 'second-level question number'
+  55: 'second-level title'
+  56: 'section'
+  57: 'section title'
+  58: 'sidebar'
+  59: 'sub section title'
+  60: 'subhead'
+  61: 'subsub section title'
+  62: 'supplementary note'
+  63: 'table'
+  64: 'table caption'
+  65: 'table note'
+  66: 'teasers'
+  67: 'third-level question number'
+  68: 'third-level title'
+  69: 'title'
+  70: 'translator'
+  71: 'underscore'
+  72: 'unordered list'
+  73: 'weather forecast'

doclayout_yolo/cfg/default.yaml ADDED Viewed

	@@ -0,0 +1,127 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default training settings and hyperparameters for medium-augmentation COCO training
+task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+# Train settings -------------------------------------------------------------------------------------------------------
+model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
+data: # (str, optional) path to data file, i.e. coco128.yaml
+epochs: 100 # (int) number of epochs to train for
+time: # (float, optional) number of hours to train for, overrides epochs if supplied
+patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
+batch: 16 # (int) number of images per batch (-1 for AutoBatch)
+imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+save: True # (bool) save train checkpoints and predict results
+save_period: 10 # (int) Save checkpoint every x epochs (disabled if < 1)
+val_period: 1 # (int) Validation every x epochs
+cache: False # (bool) True/ram, disk or False. Use cache for data loading
+device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
+project: # (str, optional) project name
+name: # (str, optional) experiment name, results saved to 'project/name' directory
+exist_ok: True # (bool) whether to overwrite existing experiment
+pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
+optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
+verbose: True # (bool) whether to print verbose output
+seed: 0 # (int) random seed for reproducibility
+deterministic: True # (bool) whether to enable deterministic mode
+single_cls: False # (bool) train multi-class data as single-class
+rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cos_lr: False # (bool) use cosine learning rate scheduler
+close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
+resume: False # (bool) resume training from last checkpoint
+amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
+profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
+freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
+multi_scale: False # (bool) Whether to use multiscale during training
+# Segmentation
+overlap_mask: True # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4 # (int) mask downsample ratio (segment train only)
+# Classification
+dropout: 0.0 # (float) use dropout regularization (classify train only)
+# Val/Test settings ----------------------------------------------------------------------------------------------------
+val: True # (bool) validate/test during training
+split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
+save_json: False # (bool) save results to JSON file
+save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
+conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
+iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
+max_det: 300 # (int) maximum number of detections per image
+half: False # (bool) use half precision (FP16)
+dnn: False # (bool) use OpenCV DNN for ONNX inference
+plots: True # (bool) save plots and images during train/val
+# Predict settings -----------------------------------------------------------------------------------------------------
+source: # (str, optional) source directory for images or videos
+vid_stride: 1 # (int) video frame-rate stride
+stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
+visualize: False # (bool) visualize model features
+augment: False # (bool) apply image augmentation to prediction sources
+agnostic_nms: False # (bool) class-agnostic NMS
+classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+retina_masks: False # (bool) use high-resolution segmentation masks
+embed: # (list[int], optional) return feature vectors/embeddings from given layers
+# Visualize settings ---------------------------------------------------------------------------------------------------
+show: False # (bool) show predicted images and videos if environment allows
+save_frames: False # (bool) save predicted individual video frames
+save_txt: False # (bool) save results as .txt file
+save_conf: False # (bool) save results with confidence scores
+save_crop: False # (bool) save cropped images with results
+show_labels: True # (bool) show prediction labels, i.e. 'person'
+show_conf: True # (bool) show prediction confidence, i.e. '0.99'
+show_boxes: True # (bool) show prediction boxes
+line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
+# Export settings ------------------------------------------------------------------------------------------------------
+format: torchscript # (str) format to export to, choices at https://docs.doclayout_yolo.com/modes/export/#export-formats
+keras: False # (bool) use Kera=s
+optimize: False # (bool) TorchScript: optimize for mobile
+int8: False # (bool) CoreML/TF INT8 quantization
+dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
+simplify: False # (bool) ONNX: simplify model
+opset: # (int, optional) ONNX: opset version
+workspace: 4 # (int) TensorRT: workspace size (GB)
+nms: False # (bool) CoreML: add NMS
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01 # (float) final learning rate (lr0 * lrf)
+momentum: 0.937 # (float) SGD momentum/Adam beta1
+weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
+warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
+warmup_momentum: 0.8 # (float) warmup initial momentum
+warmup_bias_lr: 0.1 # (float) warmup initial bias lr
+box: 7.5 # (float) box loss gain
+cls: 0.5 # (float) cls loss gain (scale with pixels)
+dfl: 1.5 # (float) dfl loss gain
+pose: 12.0 # (float) pose loss gain
+kobj: 1.0 # (float) keypoint obj loss gain
+label_smoothing: 0.0 # (float) label smoothing (fraction)
+nbs: 64 # (int) nominal batch size
+hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0 # (float) image rotation (+/- deg)
+translate: 0.1 # (float) image translation (+/- fraction)
+scale: 0.5 # (float) image scale (+/- gain)
+shear: 0.0 # (float) image shear (+/- deg)
+perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0 # (float) image flip up-down (probability)
+fliplr: 0.5 # (float) image flip left-right (probability)
+bgr: 0.0 # (float) image channel BGR (probability)
+mosaic: 1.0 # (float) image mosaic (probability)
+mixup: 0.0 # (float) image mixup (probability)
+copy_paste: 0.0 # (float) segment copy-paste (probability)
+auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
+erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
+crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
+# Custom config.yaml ---------------------------------------------------------------------------------------------------
+cfg: # (str, optional) for overriding defaults.yaml
+# Tracker settings ------------------------------------------------------------------------------------------------------
+tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]

doclayout_yolo/cfg/models/README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+## Models
+Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
+These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
+To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.doclayout_yolo.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
+### Usage
+Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
+```bash
+yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
+```
+They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.doclayout_yolo.com/usage/cfg/) as in the CLI example above:
+```python
+from doclayout_yolo import YOLO
+model = YOLO("model.yaml")  # build a YOLOv8n model from scratch
+# YOLO("model.pt")  use pre-trained model if available
+model.info()  # display model information
+model.train(data="coco128.yaml", epochs=100)  # train the model
+```
+## Pre-trained Model Architectures
+Ultralytics supports many model architectures. Visit https://docs.doclayout_yolo.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
+## Contribute New Models
+Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
+By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
+To get started, please consult our [Contributing Guide](https://docs.doclayout_yolo.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
+Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!

doclayout_yolo/cfg/models/rt-detr/rtdetr-l.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/rtdetr
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, HGStem, [32, 48]] # 0-P2/4
+  - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
+  - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
+  - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
+  - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
+  - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
+  - [[-1, 17], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
+  - [[-1, 12], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
+  - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet101.yaml ADDED Viewed

	@@ -0,0 +1,42 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet101 object detection model with P3-P5 outputs.
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 7
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 11
+  - [-1, 1, Conv, [256, 1, 1]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet50.yaml ADDED Viewed

	@@ -0,0 +1,42 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-ResNet50 object detection model with P3-P5 outputs.
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
+head:
+  - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
+  - [-1, 1, AIFI, [1024, 8]]
+  - [-1, 1, Conv, [256, 1, 1]] # 7
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [256]] # 11
+  - [-1, 1, Conv, [256, 1, 1]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
+  - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
+  - [[-1, 12], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
+  - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
+  - [[-1, 7], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
+  - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/rt-detr/rtdetr-x.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/rtdetr
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  x: [1.00, 1.00, 2048]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, HGStem, [32, 64]] # 0-P2/4
+  - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
+  - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
+  - [-1, 6, HGBlock, [128, 512, 3]]
+  - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
+  - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
+  - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
+  - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
+  - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
+  - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
+  - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
+head:
+  - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
+  - [-1, 1, AIFI, [2048, 8]]
+  - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
+  - [[-2, -1], 1, Concat, [1]]
+  - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
+  - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
+  - [[-2, -1], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
+  - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
+  - [[-1, 21], 1, Concat, [1]] # cat Y4
+  - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
+  - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
+  - [[-1, 16], 1, Concat, [1]] # cat Y5
+  - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
+  - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10b.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  b: [0.67, 1.00, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10l.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10m-doclayout.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, G2L_CRM, [256, True, True, [1,2,3], 5, "glu"]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, G2L_CRM, [512, True, True, [1,3,5], 3, "glu"]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 6, G2L_CRM, [1024, True, False]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10m.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10n.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10s.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  s: [0.33, 0.50, 1024]
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v10/yolov10x.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2fCIB, [512, True]]
+  - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2fCIB, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+  - [-1, 1, PSA, [1024]] # 10
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fCIB, [512, True]] # 13
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 16 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 13], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
+  - [-1, 1, SCDown, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
+  - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v3/yolov3-spp.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov3
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [32, 3, 1]] # 0
+  - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
+  - [-1, 1, Bottleneck, [64]]
+  - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
+  - [-1, 2, Bottleneck, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
+  - [-1, 8, Bottleneck, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
+  - [-1, 8, Bottleneck, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
+  - [-1, 4, Bottleneck, [1024]] # 10
+# YOLOv3-SPP head
+head:
+  - [-1, 1, Bottleneck, [1024, False]]
+  - [-1, 1, SPP, [512, [5, 9, 13]]]
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
+  - [-2, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Bottleneck, [256, False]]
+  - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
+  - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v3/yolov3-tiny.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov3
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+# YOLOv3-tiny backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [16, 3, 1]] # 0
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
+  - [-1, 1, Conv, [32, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
+  - [-1, 1, Conv, [64, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
+  - [-1, 1, Conv, [128, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
+  - [-1, 1, Conv, [512, 3, 1]]
+  - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
+  - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12
+# YOLOv3-tiny head
+head:
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)
+  - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)

doclayout_yolo/cfg/models/v3/yolov3.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov3
+# Parameters
+nc: 80 # number of classes
+depth_multiple: 1.0 # model depth multiple
+width_multiple: 1.0 # layer channel multiple
+# darknet53 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [32, 3, 1]] # 0
+  - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
+  - [-1, 1, Bottleneck, [64]]
+  - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
+  - [-1, 2, Bottleneck, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
+  - [-1, 8, Bottleneck, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
+  - [-1, 8, Bottleneck, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
+  - [-1, 4, Bottleneck, [1024]] # 10
+# YOLOv3 head
+head:
+  - [-1, 1, Bottleneck, [1024, False]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]]
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
+  - [-2, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Bottleneck, [512, False]]
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
+  - [-2, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Bottleneck, [256, False]]
+  - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
+  - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v5/yolov5-p6.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.doclayout_yolo.com/models/yolov5
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.33, 1.25, 1024]
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 9, C3, [512]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C3, [768]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C3, [1024]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+# YOLOv5 v6.0 head
+head:
+  - [-1, 1, Conv, [768, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C3, [768, False]] # 15
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3, [512, False]] # 19
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3, [256, False]] # 23 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 20], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3, [512, False]] # 26 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 16], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3, [768, False]] # 29 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)
+  - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)

doclayout_yolo/cfg/models/v5/yolov5.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov5
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.33, 1.25, 1024]
+# YOLOv5 v6.0 backbone
+backbone:
+  # [from, number, module, args]
+  - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3, [128]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3, [256]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 9, C3, [512]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3, [1024]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv5 v6.0 head
+head:
+  - [-1, 1, Conv, [512, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3, [512, False]] # 13
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3, [256, False]] # 17 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3, [512, False]] # 20 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3, [1024, False]] # 23 (P5/32-large)
+  - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v6/yolov6.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/models/yolov6
+# Parameters
+nc: 80 # number of classes
+activation: nn.ReLU() # (optional) model default activation function
+scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv6-3.0s backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 6, Conv, [128, 3, 1]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 12, Conv, [256, 3, 1]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 18, Conv, [512, 3, 1]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 6, Conv, [1024, 3, 1]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv6-3.0s head
+head:
+  - [-1, 1, Conv, [256, 1, 1]]
+  - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 9, Conv, [256, 3, 1]] # 14
+  - [-1, 1, Conv, [128, 1, 1]]
+  - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 1, Conv, [128, 3, 1]]
+  - [-1, 9, Conv, [128, 3, 1]] # 19
+  - [-1, 1, Conv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P4
+  - [-1, 1, Conv, [256, 3, 1]]
+  - [-1, 9, Conv, [256, 3, 1]] # 23
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 10], 1, Concat, [1]] # cat head P5
+  - [-1, 1, Conv, [512, 3, 1]]
+  - [-1, 9, Conv, [512, 3, 1]] # 27
+  - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-cls-resnet101.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.doclayout_yolo.com/tasks/classify
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3-P4/16
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify

doclayout_yolo/cfg/models/v8/yolov8-cls-resnet50.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.doclayout_yolo.com/tasks/classify
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
+  - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
+  - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
+  - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16
+  - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify

doclayout_yolo/cfg/models/v8/yolov8-cls.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-cls image classification model. For Usage examples see https://docs.doclayout_yolo.com/tasks/classify
+# Parameters
+nc: 1000 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 1024]
+  l: [1.00, 1.00, 1024]
+  x: [1.00, 1.25, 1024]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+# YOLOv8.0n head
+head:
+  - [-1, 1, Classify, [nc]] # Classify

doclayout_yolo/cfg/models/v8/yolov8-ghost-p2.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters,   2033928 gradients,  13.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters,   5562064 gradients,  25.1 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters,   9031712 gradients,  42.8 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients,  69.1 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0-ghost-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 2], 1, Concat, [1]] # cat backbone P2
+  - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
+  - [-1, 1, GhostConv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P3
+  - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
+  - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-ghost-p6.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
+# YOLOv8.0-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [768, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+# YOLOv8.0-ghost-p6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C3Ghost, [768]] # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
+  - [-1, 1, GhostConv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)

doclayout_yolo/cfg/models/v8/yolov8-ghost.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers,  1865316 parameters,  1865300 gradients,   5.8 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers,  5960072 parameters,  5960056 gradients,  16.4 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients,  32.7 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients,  53.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients,  83.3 GFLOPs
+# YOLOv8.0n-ghost backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C3Ghost, [128, True]]
+  - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C3Ghost, [256, True]]
+  - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C3Ghost, [512, True]]
+  - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C3Ghost, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C3Ghost, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
+  - [-1, 1, GhostConv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
+  - [-1, 1, GhostConv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-obb.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-p2.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0-p2 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 2], 1, Concat, [1]] # cat backbone P2
+  - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
+  - [-1, 1, Conv, [128, 3, 2]]
+  - [[-1, 15], 1, Concat, [1]] # cat head P3
+  - [-1, 3, C2f, [256]] # 21 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
+  - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-p6.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)

doclayout_yolo/cfg/models/v8/yolov8-pose-p6.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/pose
+# Parameters
+nc: 1 # number of classes
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)

doclayout_yolo/cfg/models/v8/yolov8-pose.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/pose
+# Parameters
+nc: 1 # number of classes
+kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
+scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-rtdetr.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-seg-p6.yaml ADDED Viewed

	@@ -0,0 +1,56 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/segment
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0x6 backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [768, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 11
+# YOLOv8.0x6 head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 8], 1, Concat, [1]] # cat backbone P5
+  - [-1, 3, C2, [768, False]] # 14
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2, [512, False]] # 17
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 17], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 14], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
+  - [-1, 1, Conv, [768, 3, 2]]
+  - [[-1, 11], 1, Concat, [1]] # cat head P6
+  - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
+  - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)

doclayout_yolo/cfg/models/v8/yolov8-seg.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-seg instance segmentation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/segment
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024]
+  s: [0.33, 0.50, 1024]
+  m: [0.67, 0.75, 768]
+  l: [1.00, 1.00, 512]
+  x: [1.00, 1.25, 512]
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-world.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
+  - [[15, 12, 9], 1, ImagePoolingAttn, [256]] # 16 (P3/8-small)
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 19 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fAttn, [1024, 512, 16]] # 22 (P5/32-large)
+  - [[15, 19, 22], 1, WorldDetect, [nc, 512, False]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8-worldv2.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
+  - [15, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2fAttn, [512, 256, 8]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2fAttn, [1024, 512, 16]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, WorldDetect, [nc, 512, True]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v8/yolov8.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
+# Parameters
+nc: 80 # number of classes
+scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
+  # [depth, width, max_channels]
+  n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers,  3157200 parameters,  3157184 gradients,   8.9 GFLOPs
+  s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients,  28.8 GFLOPs
+  m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients,  79.3 GFLOPs
+  l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
+  x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
+# YOLOv8.0n backbone
+backbone:
+  # [from, repeats, module, args]
+  - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
+  - [-1, 3, C2f, [128, True]]
+  - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
+  - [-1, 6, C2f, [256, True]]
+  - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
+  - [-1, 6, C2f, [512, True]]
+  - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
+  - [-1, 3, C2f, [1024, True]]
+  - [-1, 1, SPPF, [1024, 5]] # 9
+# YOLOv8.0n head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 6], 1, Concat, [1]] # cat backbone P4
+  - [-1, 3, C2f, [512]] # 12
+  - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
+  - [[-1, 4], 1, Concat, [1]] # cat backbone P3
+  - [-1, 3, C2f, [256]] # 15 (P3/8-small)
+  - [-1, 1, Conv, [256, 3, 2]]
+  - [[-1, 12], 1, Concat, [1]] # cat head P4
+  - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
+  - [-1, 1, Conv, [512, 3, 2]]
+  - [[-1, 9], 1, Concat, [1]] # cat head P5
+  - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)

doclayout_yolo/cfg/models/v9/yolov9c.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+# YOLOv9
+# parameters
+nc: 80  # number of classes
+# gelan backbone
+backbone:
+  - [-1, 1, Conv, [64, 3, 2]]  # 0-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 1-P2/4
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]]  # 2
+  - [-1, 1, ADown, [256]]  # 3-P3/8
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]]  # 4
+  - [-1, 1, ADown, [512]]  # 5-P4/16
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 6
+  - [-1, 1, ADown, [512]]  # 7-P5/32
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 8
+  - [-1, 1, SPPELAN, [512, 256]]  # 9
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 6], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 12
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 4], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]]  # 15 (P3/8-small)
+  - [-1, 1, ADown, [256]]
+  - [[-1, 12], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 18 (P4/16-medium)
+  - [-1, 1, ADown, [512]]
+  - [[-1, 9], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 21 (P5/32-large)
+  - [[15, 18, 21], 1, Detect, [nc]]  # DDetect(P3, P4, P5)

doclayout_yolo/cfg/models/v9/yolov9e.yaml ADDED Viewed

	@@ -0,0 +1,60 @@

+# YOLOv9
+# parameters
+nc: 80  # number of classes
+# gelan backbone
+backbone:
+  - [-1, 1, Silence, []]
+  - [-1, 1, Conv, [64, 3, 2]]  # 1-P1/2
+  - [-1, 1, Conv, [128, 3, 2]]  # 2-P2/4
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]]  # 3
+  - [-1, 1, ADown, [256]]  # 4-P3/8
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]]  # 5
+  - [-1, 1, ADown, [512]]  # 6-P4/16
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 7
+  - [-1, 1, ADown, [1024]]  # 8-P5/32
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 9
+  - [1, 1, CBLinear, [[64]]] # 10
+  - [3, 1, CBLinear, [[64, 128]]] # 11
+  - [5, 1, CBLinear, [[64, 128, 256]]] # 12
+  - [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
+  - [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
+  - [0, 1, Conv, [64, 3, 2]]  # 15-P1/2
+  - [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
+  - [-1, 1, Conv, [128, 3, 2]]  # 17-P2/4
+  - [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]] # 18
+  - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]]  # 19
+  - [-1, 1, ADown, [256]]  # 20-P3/8
+  - [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]] # 21
+  - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]]  # 22
+  - [-1, 1, ADown, [512]]  # 23-P4/16
+  - [[13, 14, -1], 1, CBFuse, [[3, 3]]] # 24
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 25
+  - [-1, 1, ADown, [1024]]  # 26-P5/32
+  - [[14, -1], 1, CBFuse, [[4]]] # 27
+  - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 28
+  - [-1, 1, SPPELAN, [512, 256]]  # 29
+# gelan head
+head:
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 25], 1, Concat, [1]]  # cat backbone P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]]  # 32
+  - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
+  - [[-1, 22], 1, Concat, [1]]  # cat backbone P3
+  - [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]]  # 35 (P3/8-small)
+  - [-1, 1, ADown, [256]]
+  - [[-1, 32], 1, Concat, [1]]  # cat head P4
+  - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]]  # 38 (P4/16-medium)
+  - [-1, 1, ADown, [512]]
+  - [[-1, 29], 1, Concat, [1]]  # cat head P5
+  - [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]]  # 41 (P5/32-large)
+   # detect
+  - [[35, 38, 41], 1, Detect, [nc]]  # Detect(P3, P4, P5)

doclayout_yolo/cfg/trackers/botsort.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
+tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5 # threshold for the first association
+track_low_thresh: 0.1 # threshold for the second association
+new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+track_buffer: 30 # buffer to calculate the time when to remove tracks
+match_thresh: 0.8 # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)
+# BoT-SORT settings
+gmc_method: sparseOptFlow # method of global motion compensation
+# ReID model related thresh (not supported yet)
+proximity_thresh: 0.5
+appearance_thresh: 0.25
+with_reid: False

doclayout_yolo/cfg/trackers/bytetrack.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
+tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
+track_high_thresh: 0.5 # threshold for the first association
+track_low_thresh: 0.1 # threshold for the second association
+new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
+track_buffer: 30 # buffer to calculate the time when to remove tracks
+match_thresh: 0.8 # threshold for matching tracks
+# min_box_area: 10  # threshold for min box areas(for tracker evaluation, not used for now)
+# mot20: False  # for tracker evaluation(not used for now)

doclayout_yolo/data/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from .base import BaseDataset
+from .build import build_dataloader, build_yolo_dataset, load_inference_source
+from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
+__all__ = (
+    "BaseDataset",
+    "ClassificationDataset",
+    "SemanticDataset",
+    "YOLODataset",
+    "build_yolo_dataset",
+    "build_dataloader",
+    "load_inference_source",
+)

doclayout_yolo/data/annotator.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Ultralytics YOLO 🚀, AGPL-3.0 license
+from pathlib import Path
+from doclayout_yolo import SAM, YOLO
+def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
+    """
+    Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
+    Args:
+        data (str): Path to a folder containing images to be annotated.
+        det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
+        sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
+        device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
+        output_dir (str | None | optional): Directory to save the annotated results.
+            Defaults to a 'labels' folder in the same directory as 'data'.
+    Example:
+        ```python
+        from doclayout_yolo.data.annotator import auto_annotate
+        auto_annotate(data='doclayout_yolo/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
+        ```
+    """
+    det_model = YOLO(det_model)
+    sam_model = SAM(sam_model)
+    data = Path(data)
+    if not output_dir:
+        output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
+    Path(output_dir).mkdir(exist_ok=True, parents=True)
+    det_results = det_model(data, stream=True, device=device)
+    for result in det_results:
+        class_ids = result.boxes.cls.int().tolist()  # noqa
+        if len(class_ids):
+            boxes = result.boxes.xyxy  # Boxes object for bbox outputs
+            sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
+            segments = sam_results[0].masks.xyn  # noqa
+            with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
+                for i in range(len(segments)):
+                    s = segments[i]
+                    if len(s) == 0:
+                        continue
+                    segment = map(str, segments[i].reshape(-1).tolist())
+                    f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")