Adieee5 commited on
Commit
5bd4739
·
verified ·
1 Parent(s): 19dd704

Upload 212 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. doclayout_yolo/.DS_Store +0 -0
  2. doclayout_yolo/__init__.py +27 -0
  3. doclayout_yolo/assets/bus.jpg +3 -0
  4. doclayout_yolo/assets/zidane.jpg +0 -0
  5. doclayout_yolo/cfg/__init__.py +609 -0
  6. doclayout_yolo/cfg/datasets/d4la.yaml +45 -0
  7. doclayout_yolo/cfg/datasets/doclaynet.yaml +28 -0
  8. doclayout_yolo/cfg/datasets/docsynth300k.yaml +91 -0
  9. doclayout_yolo/cfg/default.yaml +127 -0
  10. doclayout_yolo/cfg/models/README.md +40 -0
  11. doclayout_yolo/cfg/models/rt-detr/rtdetr-l.yaml +50 -0
  12. doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet101.yaml +42 -0
  13. doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet50.yaml +42 -0
  14. doclayout_yolo/cfg/models/rt-detr/rtdetr-x.yaml +54 -0
  15. doclayout_yolo/cfg/models/v10/yolov10b.yaml +40 -0
  16. doclayout_yolo/cfg/models/v10/yolov10l.yaml +40 -0
  17. doclayout_yolo/cfg/models/v10/yolov10m-doclayout.yaml +43 -0
  18. doclayout_yolo/cfg/models/v10/yolov10m.yaml +43 -0
  19. doclayout_yolo/cfg/models/v10/yolov10n.yaml +40 -0
  20. doclayout_yolo/cfg/models/v10/yolov10s.yaml +39 -0
  21. doclayout_yolo/cfg/models/v10/yolov10x.yaml +40 -0
  22. doclayout_yolo/cfg/models/v3/yolov3-spp.yaml +46 -0
  23. doclayout_yolo/cfg/models/v3/yolov3-tiny.yaml +37 -0
  24. doclayout_yolo/cfg/models/v3/yolov3.yaml +46 -0
  25. doclayout_yolo/cfg/models/v5/yolov5-p6.yaml +59 -0
  26. doclayout_yolo/cfg/models/v5/yolov5.yaml +48 -0
  27. doclayout_yolo/cfg/models/v6/yolov6.yaml +53 -0
  28. doclayout_yolo/cfg/models/v8/yolov8-cls-resnet101.yaml +25 -0
  29. doclayout_yolo/cfg/models/v8/yolov8-cls-resnet50.yaml +25 -0
  30. doclayout_yolo/cfg/models/v8/yolov8-cls.yaml +29 -0
  31. doclayout_yolo/cfg/models/v8/yolov8-ghost-p2.yaml +54 -0
  32. doclayout_yolo/cfg/models/v8/yolov8-ghost-p6.yaml +56 -0
  33. doclayout_yolo/cfg/models/v8/yolov8-ghost.yaml +47 -0
  34. doclayout_yolo/cfg/models/v8/yolov8-obb.yaml +46 -0
  35. doclayout_yolo/cfg/models/v8/yolov8-p2.yaml +54 -0
  36. doclayout_yolo/cfg/models/v8/yolov8-p6.yaml +56 -0
  37. doclayout_yolo/cfg/models/v8/yolov8-pose-p6.yaml +57 -0
  38. doclayout_yolo/cfg/models/v8/yolov8-pose.yaml +47 -0
  39. doclayout_yolo/cfg/models/v8/yolov8-rtdetr.yaml +46 -0
  40. doclayout_yolo/cfg/models/v8/yolov8-seg-p6.yaml +56 -0
  41. doclayout_yolo/cfg/models/v8/yolov8-seg.yaml +46 -0
  42. doclayout_yolo/cfg/models/v8/yolov8-world.yaml +48 -0
  43. doclayout_yolo/cfg/models/v8/yolov8-worldv2.yaml +46 -0
  44. doclayout_yolo/cfg/models/v8/yolov8.yaml +46 -0
  45. doclayout_yolo/cfg/models/v9/yolov9c.yaml +36 -0
  46. doclayout_yolo/cfg/models/v9/yolov9e.yaml +60 -0
  47. doclayout_yolo/cfg/trackers/botsort.yaml +18 -0
  48. doclayout_yolo/cfg/trackers/bytetrack.yaml +11 -0
  49. doclayout_yolo/data/__init__.py +15 -0
  50. doclayout_yolo/data/annotator.py +50 -0
doclayout_yolo/.DS_Store ADDED
Binary file (6.15 kB). View file
 
doclayout_yolo/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ __version__ = "0.0.2"
4
+
5
+ from doclayout_yolo.data.explorer.explorer import Explorer
6
+ from doclayout_yolo.models import RTDETR, SAM, YOLO, YOLOWorld, YOLOv10
7
+ from doclayout_yolo.models.fastsam import FastSAM
8
+ from doclayout_yolo.models.nas import NAS
9
+ from doclayout_yolo.utils import ASSETS, SETTINGS as settings
10
+ from doclayout_yolo.utils.checks import check_yolo as checks
11
+ from doclayout_yolo.utils.downloads import download
12
+
13
+ __all__ = (
14
+ "__version__",
15
+ "ASSETS",
16
+ "YOLO",
17
+ "YOLOWorld",
18
+ "NAS",
19
+ "SAM",
20
+ "FastSAM",
21
+ "RTDETR",
22
+ "checks",
23
+ "download",
24
+ "settings",
25
+ "Explorer",
26
+ "YOLOv10"
27
+ )
doclayout_yolo/assets/bus.jpg ADDED

Git LFS Details

  • SHA256: c02019c4979c191eb739ddd944445ef408dad5679acab6fd520ef9d434bfbc63
  • Pointer size: 131 Bytes
  • Size of remote file: 137 kB
doclayout_yolo/assets/zidane.jpg ADDED
doclayout_yolo/cfg/__init__.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ import pdb
4
+
5
+ import contextlib
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ from pathlib import Path
10
+ from types import SimpleNamespace
11
+ from typing import Dict, List, Union
12
+
13
+ from doclayout_yolo.utils import (
14
+ ASSETS,
15
+ DEFAULT_CFG,
16
+ DEFAULT_CFG_DICT,
17
+ DEFAULT_CFG_PATH,
18
+ LOGGER,
19
+ RANK,
20
+ ROOT,
21
+ RUNS_DIR,
22
+ SETTINGS,
23
+ SETTINGS_YAML,
24
+ TESTS_RUNNING,
25
+ IterableSimpleNamespace,
26
+ __version__,
27
+ checks,
28
+ colorstr,
29
+ deprecation_warn,
30
+ yaml_load,
31
+ yaml_print,
32
+ )
33
+
34
+ # Define valid tasks and modes
35
+ MODES = {"train", "val", "predict", "export", "track", "benchmark"}
36
+ TASKS = {"detect", "segment", "classify", "pose", "obb"}
37
+ TASK2DATA = {
38
+ "detect": "coco8.yaml",
39
+ "segment": "coco8-seg.yaml",
40
+ "classify": "imagenet10",
41
+ "pose": "coco8-pose.yaml",
42
+ "obb": "dota8.yaml",
43
+ }
44
+ TASK2MODEL = {
45
+ "detect": "yolov8n.pt",
46
+ "segment": "yolov8n-seg.pt",
47
+ "classify": "yolov8n-cls.pt",
48
+ "pose": "yolov8n-pose.pt",
49
+ "obb": "yolov8n-obb.pt",
50
+ }
51
+ TASK2METRIC = {
52
+ "detect": "metrics/mAP50-95(B)",
53
+ "segment": "metrics/mAP50-95(M)",
54
+ "classify": "metrics/accuracy_top1",
55
+ "pose": "metrics/mAP50-95(P)",
56
+ "obb": "metrics/mAP50-95(B)",
57
+ }
58
+
59
+ CLI_HELP_MSG = f"""
60
+ Arguments received: {str(['yolo'] + sys.argv[1:])}. Ultralytics 'yolo' commands use the following syntax:
61
+
62
+ yolo TASK MODE ARGS
63
+
64
+ Where TASK (optional) is one of {TASKS}
65
+ MODE (required) is one of {MODES}
66
+ ARGS (optional) are any number of custom 'arg=value' pairs like 'imgsz=320' that override defaults.
67
+ See all ARGS at https://docs.doclayout_yolo.com/usage/cfg or with 'yolo cfg'
68
+
69
+ 1. Train a detection model for 10 epochs with an initial learning_rate of 0.01
70
+ yolo train data=coco128.yaml model=yolov8n.pt epochs=10 lr0=0.01
71
+
72
+ 2. Predict a YouTube video using a pretrained segmentation model at image size 320:
73
+ yolo predict model=yolov8n-seg.pt source='https://youtu.be/LNwODJXcvt4' imgsz=320
74
+
75
+ 3. Val a pretrained detection model at batch-size 1 and image size 640:
76
+ yolo val model=yolov8n.pt data=coco128.yaml batch=1 imgsz=640
77
+
78
+ 4. Export a YOLOv8n classification model to ONNX format at image size 224 by 128 (no TASK required)
79
+ yolo export model=yolov8n-cls.pt format=onnx imgsz=224,128
80
+
81
+ 6. Explore your datasets using semantic search and SQL with a simple GUI powered by Ultralytics Explorer API
82
+ yolo explorer
83
+
84
+ 5. Run special commands:
85
+ yolo help
86
+ yolo checks
87
+ yolo version
88
+ yolo settings
89
+ yolo copy-cfg
90
+ yolo cfg
91
+
92
+ Docs: https://docs.doclayout_yolo.com
93
+ Community: https://community.doclayout_yolo.com
94
+ GitHub: https://github.com/doclayout_yolo/doclayout_yolo
95
+ """
96
+
97
+ # Define keys for arg type checks
98
+ CFG_FLOAT_KEYS = {"warmup_epochs", "box", "cls", "dfl", "degrees", "shear", "time"}
99
+ CFG_FRACTION_KEYS = {
100
+ "dropout",
101
+ "iou",
102
+ "lr0",
103
+ "lrf",
104
+ "momentum",
105
+ "weight_decay",
106
+ "warmup_momentum",
107
+ "warmup_bias_lr",
108
+ "label_smoothing",
109
+ "hsv_h",
110
+ "hsv_s",
111
+ "hsv_v",
112
+ "translate",
113
+ "scale",
114
+ "perspective",
115
+ "flipud",
116
+ "fliplr",
117
+ "bgr",
118
+ "mosaic",
119
+ "mixup",
120
+ "copy_paste",
121
+ "conf",
122
+ "iou",
123
+ "fraction",
124
+ } # fraction floats 0.0 - 1.0
125
+ CFG_INT_KEYS = {
126
+ "epochs",
127
+ "patience",
128
+ "batch",
129
+ "workers",
130
+ "seed",
131
+ "close_mosaic",
132
+ "mask_ratio",
133
+ "max_det",
134
+ "vid_stride",
135
+ "line_width",
136
+ "workspace",
137
+ "nbs",
138
+ "save_period",
139
+ }
140
+ CFG_BOOL_KEYS = {
141
+ "save",
142
+ "exist_ok",
143
+ "verbose",
144
+ "deterministic",
145
+ "single_cls",
146
+ "rect",
147
+ "cos_lr",
148
+ "overlap_mask",
149
+ "val",
150
+ "save_json",
151
+ "save_hybrid",
152
+ "half",
153
+ "dnn",
154
+ "plots",
155
+ "show",
156
+ "save_txt",
157
+ "save_conf",
158
+ "save_crop",
159
+ "save_frames",
160
+ "show_labels",
161
+ "show_conf",
162
+ "visualize",
163
+ "augment",
164
+ "agnostic_nms",
165
+ "retina_masks",
166
+ "show_boxes",
167
+ "keras",
168
+ "optimize",
169
+ "int8",
170
+ "dynamic",
171
+ "simplify",
172
+ "nms",
173
+ "profile",
174
+ "multi_scale",
175
+ "fuse"
176
+ }
177
+
178
+
179
+ def cfg2dict(cfg):
180
+ """
181
+ Convert a configuration object to a dictionary, whether it is a file path, a string, or a SimpleNamespace object.
182
+
183
+ Args:
184
+ cfg (str | Path | dict | SimpleNamespace): Configuration object to be converted to a dictionary.
185
+
186
+ Returns:
187
+ cfg (dict): Configuration object in dictionary format.
188
+ """
189
+ if isinstance(cfg, (str, Path)):
190
+ cfg = yaml_load(cfg) # load dict
191
+ elif isinstance(cfg, SimpleNamespace):
192
+ cfg = vars(cfg) # convert to dict
193
+ return cfg
194
+
195
+
196
+ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, overrides: Dict = None):
197
+ """
198
+ Load and merge configuration data from a file or dictionary.
199
+
200
+ Args:
201
+ cfg (str | Path | Dict | SimpleNamespace): Configuration data.
202
+ overrides (str | Dict | optional): Overrides in the form of a file name or a dictionary. Default is None.
203
+
204
+ Returns:
205
+ (SimpleNamespace): Training arguments namespace.
206
+ """
207
+ cfg = cfg2dict(cfg)
208
+
209
+ # Merge overrides
210
+ if overrides:
211
+ overrides = cfg2dict(overrides)
212
+ if "save_dir" not in cfg:
213
+ overrides.pop("save_dir", None) # special override keys to ignore
214
+ check_dict_alignment(cfg, overrides)
215
+ cfg = {**cfg, **overrides} # merge cfg and overrides dicts (prefer overrides)
216
+
217
+ # Special handling for numeric project/name
218
+ for k in "project", "name":
219
+ if k in cfg and isinstance(cfg[k], (int, float)):
220
+ cfg[k] = str(cfg[k])
221
+ if cfg.get("name") == "model": # assign model to 'name' arg
222
+ cfg["name"] = cfg.get("model", "").split(".")[0]
223
+ LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
224
+
225
+ # Type and Value checks
226
+ check_cfg(cfg)
227
+
228
+ # Return instance
229
+ return IterableSimpleNamespace(**cfg)
230
+
231
+
232
+ def check_cfg(cfg, hard=True):
233
+ """Check Ultralytics configuration argument types and values."""
234
+ for k, v in cfg.items():
235
+ if v is not None: # None values may be from optional args
236
+ if k in CFG_FLOAT_KEYS and not isinstance(v, (int, float)):
237
+ if hard:
238
+ raise TypeError(
239
+ f"'{k}={v}' is of invalid type {type(v).__name__}. "
240
+ f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
241
+ )
242
+ cfg[k] = float(v)
243
+ elif k in CFG_FRACTION_KEYS:
244
+ if not isinstance(v, (int, float)):
245
+ if hard:
246
+ raise TypeError(
247
+ f"'{k}={v}' is of invalid type {type(v).__name__}. "
248
+ f"Valid '{k}' types are int (i.e. '{k}=0') or float (i.e. '{k}=0.5')"
249
+ )
250
+ cfg[k] = v = float(v)
251
+ if not (0.0 <= v <= 1.0):
252
+ raise ValueError(f"'{k}={v}' is an invalid value. " f"Valid '{k}' values are between 0.0 and 1.0.")
253
+ elif k in CFG_INT_KEYS and not isinstance(v, int):
254
+ if hard:
255
+ raise TypeError(
256
+ f"'{k}={v}' is of invalid type {type(v).__name__}. " f"'{k}' must be an int (i.e. '{k}=8')"
257
+ )
258
+ cfg[k] = int(v)
259
+ elif k in CFG_BOOL_KEYS and not isinstance(v, bool):
260
+ if hard:
261
+ raise TypeError(
262
+ f"'{k}={v}' is of invalid type {type(v).__name__}. "
263
+ f"'{k}' must be a bool (i.e. '{k}=True' or '{k}=False')"
264
+ )
265
+ cfg[k] = bool(v)
266
+
267
+
268
+ def get_save_dir(args, name=None):
269
+ """Return save_dir as created from train/val/predict arguments."""
270
+
271
+ if getattr(args, "save_dir", None):
272
+ save_dir = args.save_dir
273
+ else:
274
+ from doclayout_yolo.utils.files import increment_path
275
+
276
+ project = args.project or (ROOT.parent / "tests/tmp/runs" if TESTS_RUNNING else RUNS_DIR) / args.task
277
+ name = name or args.name or f"{args.mode}"
278
+ save_dir = increment_path(Path(project) / name, exist_ok=args.exist_ok if RANK in (-1, 0) else True)
279
+
280
+ return Path(save_dir)
281
+
282
+
283
+ def _handle_deprecation(custom):
284
+ """Hardcoded function to handle deprecated config keys."""
285
+
286
+ for key in custom.copy().keys():
287
+ if key == "boxes":
288
+ deprecation_warn(key, "show_boxes")
289
+ custom["show_boxes"] = custom.pop("boxes")
290
+ if key == "hide_labels":
291
+ deprecation_warn(key, "show_labels")
292
+ custom["show_labels"] = custom.pop("hide_labels") == "False"
293
+ if key == "hide_conf":
294
+ deprecation_warn(key, "show_conf")
295
+ custom["show_conf"] = custom.pop("hide_conf") == "False"
296
+ if key == "line_thickness":
297
+ deprecation_warn(key, "line_width")
298
+ custom["line_width"] = custom.pop("line_thickness")
299
+
300
+ return custom
301
+
302
+
303
+ def check_dict_alignment(base: Dict, custom: Dict, e=None):
304
+ """
305
+ This function checks for any mismatched keys between a custom configuration list and a base configuration list. If
306
+ any mismatched keys are found, the function prints out similar keys from the base list and exits the program.
307
+
308
+ Args:
309
+ custom (dict): a dictionary of custom configuration options
310
+ base (dict): a dictionary of base configuration options
311
+ e (Error, optional): An optional error that is passed by the calling function.
312
+ """
313
+ custom = _handle_deprecation(custom)
314
+ base_keys, custom_keys = (set(x.keys()) for x in (base, custom))
315
+ mismatched = [k for k in custom_keys if k not in base_keys]
316
+ if mismatched:
317
+ from difflib import get_close_matches
318
+
319
+ string = ""
320
+ for x in mismatched:
321
+ matches = get_close_matches(x, base_keys) # key list
322
+ matches = [f"{k}={base[k]}" if base.get(k) is not None else k for k in matches]
323
+ match_str = f"Similar arguments are i.e. {matches}." if matches else ""
324
+ string += f"'{colorstr('red', 'bold', x)}' is not a valid YOLO argument. {match_str}\n"
325
+ raise SyntaxError(string + CLI_HELP_MSG) from e
326
+
327
+
328
+ def merge_equals_args(args: List[str]) -> List[str]:
329
+ """
330
+ Merges arguments around isolated '=' args in a list of strings. The function considers cases where the first
331
+ argument ends with '=' or the second starts with '=', as well as when the middle one is an equals sign.
332
+
333
+ Args:
334
+ args (List[str]): A list of strings where each element is an argument.
335
+
336
+ Returns:
337
+ (List[str]): A list of strings where the arguments around isolated '=' are merged.
338
+ """
339
+ new_args = []
340
+ for i, arg in enumerate(args):
341
+ if arg == "=" and 0 < i < len(args) - 1: # merge ['arg', '=', 'val']
342
+ new_args[-1] += f"={args[i + 1]}"
343
+ del args[i + 1]
344
+ elif arg.endswith("=") and i < len(args) - 1 and "=" not in args[i + 1]: # merge ['arg=', 'val']
345
+ new_args.append(f"{arg}{args[i + 1]}")
346
+ del args[i + 1]
347
+ elif arg.startswith("=") and i > 0: # merge ['arg', '=val']
348
+ new_args[-1] += arg
349
+ else:
350
+ new_args.append(arg)
351
+ return new_args
352
+
353
+
354
+ def handle_yolo_hub(args: List[str]) -> None:
355
+ """
356
+ Handle Ultralytics HUB command-line interface (CLI) commands.
357
+
358
+ This function processes Ultralytics HUB CLI commands such as login and logout.
359
+ It should be called when executing a script with arguments related to HUB authentication.
360
+
361
+ Args:
362
+ args (List[str]): A list of command line arguments
363
+
364
+ Example:
365
+ ```bash
366
+ python my_script.py hub login your_api_key
367
+ ```
368
+ """
369
+ from doclayout_yolo import hub
370
+
371
+ if args[0] == "login":
372
+ key = args[1] if len(args) > 1 else ""
373
+ # Log in to Ultralytics HUB using the provided API key
374
+ hub.login(key)
375
+ elif args[0] == "logout":
376
+ # Log out from Ultralytics HUB
377
+ hub.logout()
378
+
379
+
380
+ def handle_yolo_settings(args: List[str]) -> None:
381
+ """
382
+ Handle YOLO settings command-line interface (CLI) commands.
383
+
384
+ This function processes YOLO settings CLI commands such as reset.
385
+ It should be called when executing a script with arguments related to YOLO settings management.
386
+
387
+ Args:
388
+ args (List[str]): A list of command line arguments for YOLO settings management.
389
+
390
+ Example:
391
+ ```bash
392
+ python my_script.py yolo settings reset
393
+ ```
394
+ """
395
+ url = "https://docs.doclayout_yolo.com/quickstart/#doclayout_yolo-settings" # help URL
396
+ try:
397
+ if any(args):
398
+ if args[0] == "reset":
399
+ SETTINGS_YAML.unlink() # delete the settings file
400
+ SETTINGS.reset() # create new settings
401
+ LOGGER.info("Settings reset successfully") # inform the user that settings have been reset
402
+ else: # save a new setting
403
+ new = dict(parse_key_value_pair(a) for a in args)
404
+ check_dict_alignment(SETTINGS, new)
405
+ SETTINGS.update(new)
406
+
407
+ LOGGER.info(f"💡 Learn about settings at {url}")
408
+ yaml_print(SETTINGS_YAML) # print the current settings
409
+ except Exception as e:
410
+ LOGGER.warning(f"WARNING ⚠️ settings error: '{e}'. Please see {url} for help.")
411
+
412
+
413
+ def handle_explorer():
414
+ """Open the Ultralytics Explorer GUI."""
415
+ checks.check_requirements("streamlit")
416
+ LOGGER.info("💡 Loading Explorer dashboard...")
417
+ subprocess.run(["streamlit", "run", ROOT / "data/explorer/gui/dash.py", "--server.maxMessageSize", "2048"])
418
+
419
+
420
+ def parse_key_value_pair(pair):
421
+ """Parse one 'key=value' pair and return key and value."""
422
+ k, v = pair.split("=", 1) # split on first '=' sign
423
+ k, v = k.strip(), v.strip() # remove spaces
424
+ assert v, f"missing '{k}' value"
425
+ return k, smart_value(v)
426
+
427
+
428
+ def smart_value(v):
429
+ """Convert a string to an underlying type such as int, float, bool, etc."""
430
+ v_lower = v.lower()
431
+ if v_lower == "none":
432
+ return None
433
+ elif v_lower == "true":
434
+ return True
435
+ elif v_lower == "false":
436
+ return False
437
+ else:
438
+ with contextlib.suppress(Exception):
439
+ return eval(v)
440
+ return v
441
+
442
+
443
+ def entrypoint(debug=""):
444
+ """
445
+ This function is the doclayout_yolo package entrypoint, it's responsible for parsing the command line arguments passed
446
+ to the package.
447
+
448
+ This function allows for:
449
+ - passing mandatory YOLO args as a list of strings
450
+ - specifying the task to be performed, either 'detect', 'segment' or 'classify'
451
+ - specifying the mode, either 'train', 'val', 'test', or 'predict'
452
+ - running special modes like 'checks'
453
+ - passing overrides to the package's configuration
454
+
455
+ It uses the package's default cfg and initializes it using the passed overrides.
456
+ Then it calls the CLI function with the composed cfg
457
+ """
458
+ args = (debug.split(" ") if debug else sys.argv)[1:]
459
+ if not args: # no arguments passed
460
+ LOGGER.info(CLI_HELP_MSG)
461
+ return
462
+
463
+ special = {
464
+ "help": lambda: LOGGER.info(CLI_HELP_MSG),
465
+ "checks": checks.collect_system_info,
466
+ "version": lambda: LOGGER.info(__version__),
467
+ "settings": lambda: handle_yolo_settings(args[1:]),
468
+ "cfg": lambda: yaml_print(DEFAULT_CFG_PATH),
469
+ "hub": lambda: handle_yolo_hub(args[1:]),
470
+ "login": lambda: handle_yolo_hub(args),
471
+ "copy-cfg": copy_default_cfg,
472
+ "explorer": lambda: handle_explorer(),
473
+ }
474
+ full_args_dict = {**DEFAULT_CFG_DICT, **{k: None for k in TASKS}, **{k: None for k in MODES}, **special}
475
+
476
+ # Define common misuses of special commands, i.e. -h, -help, --help
477
+ special.update({k[0]: v for k, v in special.items()}) # singular
478
+ special.update({k[:-1]: v for k, v in special.items() if len(k) > 1 and k.endswith("s")}) # singular
479
+ special = {**special, **{f"-{k}": v for k, v in special.items()}, **{f"--{k}": v for k, v in special.items()}}
480
+
481
+ overrides = {} # basic overrides, i.e. imgsz=320
482
+ for a in merge_equals_args(args): # merge spaces around '=' sign
483
+ if a.startswith("--"):
484
+ LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require leading dashes '--', updating to '{a[2:]}'.")
485
+ a = a[2:]
486
+ if a.endswith(","):
487
+ LOGGER.warning(f"WARNING ⚠️ argument '{a}' does not require trailing comma ',', updating to '{a[:-1]}'.")
488
+ a = a[:-1]
489
+ if "=" in a:
490
+ try:
491
+ k, v = parse_key_value_pair(a)
492
+ if k == "cfg" and v is not None: # custom.yaml passed
493
+ LOGGER.info(f"Overriding {DEFAULT_CFG_PATH} with {v}")
494
+ overrides = {k: val for k, val in yaml_load(checks.check_yaml(v)).items() if k != "cfg"}
495
+ else:
496
+ overrides[k] = v
497
+ except (NameError, SyntaxError, ValueError, AssertionError) as e:
498
+ check_dict_alignment(full_args_dict, {a: ""}, e)
499
+
500
+ elif a in TASKS:
501
+ overrides["task"] = a
502
+ elif a in MODES:
503
+ overrides["mode"] = a
504
+ elif a.lower() in special:
505
+ special[a.lower()]()
506
+ return
507
+ elif a in DEFAULT_CFG_DICT and isinstance(DEFAULT_CFG_DICT[a], bool):
508
+ overrides[a] = True # auto-True for default bool args, i.e. 'yolo show' sets show=True
509
+ elif a in DEFAULT_CFG_DICT:
510
+ raise SyntaxError(
511
+ f"'{colorstr('red', 'bold', a)}' is a valid YOLO argument but is missing an '=' sign "
512
+ f"to set its value, i.e. try '{a}={DEFAULT_CFG_DICT[a]}'\n{CLI_HELP_MSG}"
513
+ )
514
+ else:
515
+ check_dict_alignment(full_args_dict, {a: ""})
516
+
517
+ # Check keys
518
+ check_dict_alignment(full_args_dict, overrides)
519
+
520
+ # Mode
521
+ mode = overrides.get("mode")
522
+ if mode is None:
523
+ mode = DEFAULT_CFG.mode or "predict"
524
+ LOGGER.warning(f"WARNING ⚠️ 'mode' argument is missing. Valid modes are {MODES}. Using default 'mode={mode}'.")
525
+ elif mode not in MODES:
526
+ raise ValueError(f"Invalid 'mode={mode}'. Valid modes are {MODES}.\n{CLI_HELP_MSG}")
527
+
528
+ # Task
529
+ task = overrides.pop("task", None)
530
+ if task:
531
+ if task not in TASKS:
532
+ raise ValueError(f"Invalid 'task={task}'. Valid tasks are {TASKS}.\n{CLI_HELP_MSG}")
533
+ if "model" not in overrides:
534
+ overrides["model"] = TASK2MODEL[task]
535
+
536
+ # Model
537
+ model = overrides.pop("model", DEFAULT_CFG.model)
538
+ if model is None:
539
+ model = "yolov8n.pt"
540
+ LOGGER.warning(f"WARNING ⚠️ 'model' argument is missing. Using default 'model={model}'.")
541
+ overrides["model"] = model
542
+ # stem = Path(model).stem.lower()
543
+ stem = model.lower()
544
+ if "rtdetr" in stem: # guess architecture
545
+ from doclayout_yolo import RTDETR
546
+
547
+ model = RTDETR(model) # no task argument
548
+ elif "fastsam" in stem:
549
+ from doclayout_yolo import FastSAM
550
+
551
+ model = FastSAM(model)
552
+ elif "sam" in stem:
553
+ from doclayout_yolo import SAM
554
+
555
+ model = SAM(model)
556
+ elif "yolov10" in stem:
557
+ from doclayout_yolo import YOLOv10
558
+
559
+ model = YOLOv10(model)
560
+ else:
561
+ from doclayout_yolo import YOLO
562
+
563
+ model = YOLO(model, task=task)
564
+ if isinstance(overrides.get("pretrained"), str):
565
+ model.load(overrides["pretrained"])
566
+
567
+ # Task Update
568
+ if task != model.task:
569
+ if task:
570
+ LOGGER.warning(
571
+ f"WARNING ⚠️ conflicting 'task={task}' passed with 'task={model.task}' model. "
572
+ f"Ignoring 'task={task}' and updating to 'task={model.task}' to match model."
573
+ )
574
+ task = model.task
575
+
576
+ # Mode
577
+ if mode in ("predict", "track") and "source" not in overrides:
578
+ overrides["source"] = DEFAULT_CFG.source or ASSETS
579
+ LOGGER.warning(f"WARNING ⚠️ 'source' argument is missing. Using default 'source={overrides['source']}'.")
580
+ elif mode in ("train", "val"):
581
+ if "data" not in overrides and "resume" not in overrides:
582
+ overrides["data"] = DEFAULT_CFG.data or TASK2DATA.get(task or DEFAULT_CFG.task, DEFAULT_CFG.data)
583
+ LOGGER.warning(f"WARNING ⚠️ 'data' argument is missing. Using default 'data={overrides['data']}'.")
584
+ elif mode == "export":
585
+ if "format" not in overrides:
586
+ overrides["format"] = DEFAULT_CFG.format or "torchscript"
587
+ LOGGER.warning(f"WARNING ⚠️ 'format' argument is missing. Using default 'format={overrides['format']}'.")
588
+
589
+ # Run command in python
590
+ getattr(model, mode)(**overrides) # default args from model
591
+
592
+ # Show help
593
+ LOGGER.info(f"💡 Learn more at https://docs.doclayout_yolo.com/modes/{mode}")
594
+
595
+
596
+ # Special modes --------------------------------------------------------------------------------------------------------
597
+ def copy_default_cfg():
598
+ """Copy and create a new default configuration file with '_copy' appended to its name."""
599
+ new_file = Path.cwd() / DEFAULT_CFG_PATH.name.replace(".yaml", "_copy.yaml")
600
+ shutil.copy2(DEFAULT_CFG_PATH, new_file)
601
+ LOGGER.info(
602
+ f"{DEFAULT_CFG_PATH} copied to {new_file}\n"
603
+ f"Example YOLO command with this new custom cfg:\n yolo cfg='{new_file}' imgsz=320 batch=8"
604
+ )
605
+
606
+
607
+ if __name__ == "__main__":
608
+ # Example: entrypoint(debug='yolo predict model=yolov8n.pt')
609
+ entrypoint(debug="")
doclayout_yolo/cfg/datasets/d4la.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO 2017 dataset https://cocodataset.org by Microsoft
3
+ # Documentation: https://docs.doclayout_yolo.com/datasets/detect/coco/
4
+ # Example usage: yolo train data=coco.yaml
5
+ # parent
6
+ # ├── doclayout_yolo
7
+ # └── datasets
8
+ # └── coco ← downloads here (20.1 GB)
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ # path: /mnt/hwfile/opendatalab/zhaozhiyuan/yolov10/D4LA # dataset root dir
12
+ path: ./layout_data/D4LA
13
+ train: train.txt # train images (relative to 'path') 118287 images
14
+ val: test.txt # val images (relative to 'path') 5000 images
15
+ test: test.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
16
+
17
+ # Classes
18
+ names:
19
+ 0: "DocTitle"
20
+ 1: "ParaTitle"
21
+ 2: "ParaText"
22
+ 3: "ListText"
23
+ 4: "RegionTitle"
24
+ 5: "Date"
25
+ 6: "LetterHead"
26
+ 7: "LetterDear"
27
+ 8: "LetterSign"
28
+ 9: "Question"
29
+ 10: "OtherText"
30
+ 11: "RegionKV"
31
+ 12: "RegionList"
32
+ 13: "Abstract"
33
+ 14: "Author"
34
+ 15: "TableName"
35
+ 16: "Table"
36
+ 17: "Figure"
37
+ 18: "FigureName"
38
+ 19: "Equation"
39
+ 20: "Reference"
40
+ 21: "Footer"
41
+ 22: "PageHeader"
42
+ 23: "PageFooter"
43
+ 24: "Number"
44
+ 25: "Catalog"
45
+ 26: "PageNumber"
doclayout_yolo/cfg/datasets/doclaynet.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO 2017 dataset https://cocodataset.org by Microsoft
3
+ # Documentation: https://docs.doclayout_yolo.com/datasets/detect/coco/
4
+ # Example usage: yolo train data=coco.yaml
5
+ # parent
6
+ # ├── doclayout_yolo
7
+ # └── datasets
8
+ # └── coco ← downloads here (20.1 GB)
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ./layout_data/doclaynet # dataset root dir
12
+ train: train.txt # train images (relative to 'path') 118287 images
13
+ val: val.txt # val images (relative to 'path') 5000 images
14
+ test: val.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15
+
16
+ # Classes
17
+ names:
18
+ 0: Caption
19
+ 1: Footnote
20
+ 2: Formula
21
+ 3: List-item
22
+ 4: Page-footer
23
+ 5: Page-header
24
+ 6: Picture
25
+ 7: Section-header
26
+ 8: Table
27
+ 9: Text
28
+ 10: Title
doclayout_yolo/cfg/datasets/docsynth300k.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # COCO 2017 dataset https://cocodataset.org by Microsoft
3
+ # Documentation: https://docs.doclayout_yolo.com/datasets/detect/coco/
4
+ # Example usage: yolo train data=coco.yaml
5
+ # parent
6
+ # ├── doclayout_yolo
7
+ # └── datasets
8
+ # └── coco ← downloads here (20.1 GB)
9
+
10
+ # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
11
+ path: ./layout_data/docsynth300k # dataset root dir
12
+ train: train300k.txt # train images (relative to 'path') 118287 images
13
+ val: val.txt # val images (relative to 'path') 5000 images
14
+ test: val.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
15
+
16
+ # Classes
17
+ names:
18
+ 0: 'QR code'
19
+ 1: 'advertisement'
20
+ 2: 'algorithm'
21
+ 3: 'answer'
22
+ 4: 'author'
23
+ 5: 'barcode'
24
+ 6: 'bill'
25
+ 7: 'blank'
26
+ 8: 'bracket'
27
+ 9: 'breakout'
28
+ 10: 'byline'
29
+ 11: 'caption'
30
+ 12: 'catalogue'
31
+ 13: 'chapter title'
32
+ 14: 'code'
33
+ 15: 'correction'
34
+ 16: 'credit'
35
+ 17: 'dateline'
36
+ 18: 'drop cap'
37
+ 19: "editor's note"
38
+ 20: 'endnote'
39
+ 21: 'examinee information'
40
+ 22: 'fifth-level title'
41
+ 23: 'figure'
42
+ 24: 'first-level question number'
43
+ 25: 'first-level title'
44
+ 26: 'flag'
45
+ 27: 'folio'
46
+ 28: 'footer'
47
+ 29: 'footnote'
48
+ 30: 'formula'
49
+ 31: 'fourth-level section title'
50
+ 32: 'fourth-level title'
51
+ 33: 'header'
52
+ 34: 'headline'
53
+ 35: 'index'
54
+ 36: 'inside'
55
+ 37: 'institute'
56
+ 38: 'jump line'
57
+ 39: 'kicker'
58
+ 40: 'lead'
59
+ 41: 'marginal note'
60
+ 42: 'matching'
61
+ 43: 'mugshot'
62
+ 44: 'option'
63
+ 45: 'ordered list'
64
+ 46: 'other question number'
65
+ 47: 'page number'
66
+ 48: 'paragraph'
67
+ 49: 'part'
68
+ 50: 'play'
69
+ 51: 'poem'
70
+ 52: 'reference'
71
+ 53: 'sealing line'
72
+ 54: 'second-level question number'
73
+ 55: 'second-level title'
74
+ 56: 'section'
75
+ 57: 'section title'
76
+ 58: 'sidebar'
77
+ 59: 'sub section title'
78
+ 60: 'subhead'
79
+ 61: 'subsub section title'
80
+ 62: 'supplementary note'
81
+ 63: 'table'
82
+ 64: 'table caption'
83
+ 65: 'table note'
84
+ 66: 'teasers'
85
+ 67: 'third-level question number'
86
+ 68: 'third-level title'
87
+ 69: 'title'
88
+ 70: 'translator'
89
+ 71: 'underscore'
90
+ 72: 'unordered list'
91
+ 73: 'weather forecast'
doclayout_yolo/cfg/default.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Default training settings and hyperparameters for medium-augmentation COCO training
3
+
4
+ task: detect # (str) YOLO task, i.e. detect, segment, classify, pose
5
+ mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
6
+
7
+ # Train settings -------------------------------------------------------------------------------------------------------
8
+ model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml
9
+ data: # (str, optional) path to data file, i.e. coco128.yaml
10
+ epochs: 100 # (int) number of epochs to train for
11
+ time: # (float, optional) number of hours to train for, overrides epochs if supplied
12
+ patience: 100 # (int) epochs to wait for no observable improvement for early stopping of training
13
+ batch: 16 # (int) number of images per batch (-1 for AutoBatch)
14
+ imgsz: 640 # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
15
+ save: True # (bool) save train checkpoints and predict results
16
+ save_period: 10 # (int) Save checkpoint every x epochs (disabled if < 1)
17
+ val_period: 1 # (int) Validation every x epochs
18
+ cache: False # (bool) True/ram, disk or False. Use cache for data loading
19
+ device: # (int | str | list, optional) device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
20
+ workers: 8 # (int) number of worker threads for data loading (per RANK if DDP)
21
+ project: # (str, optional) project name
22
+ name: # (str, optional) experiment name, results saved to 'project/name' directory
23
+ exist_ok: True # (bool) whether to overwrite existing experiment
24
+ pretrained: True # (bool | str) whether to use a pretrained model (bool) or a model to load weights from (str)
25
+ optimizer: auto # (str) optimizer to use, choices=[SGD, Adam, Adamax, AdamW, NAdam, RAdam, RMSProp, auto]
26
+ verbose: True # (bool) whether to print verbose output
27
+ seed: 0 # (int) random seed for reproducibility
28
+ deterministic: True # (bool) whether to enable deterministic mode
29
+ single_cls: False # (bool) train multi-class data as single-class
30
+ rect: False # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
31
+ cos_lr: False # (bool) use cosine learning rate scheduler
32
+ close_mosaic: 10 # (int) disable mosaic augmentation for final epochs (0 to disable)
33
+ resume: False # (bool) resume training from last checkpoint
34
+ amp: True # (bool) Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
35
+ fraction: 1.0 # (float) dataset fraction to train on (default is 1.0, all images in train set)
36
+ profile: False # (bool) profile ONNX and TensorRT speeds during training for loggers
37
+ freeze: None # (int | list, optional) freeze first n layers, or freeze list of layer indices during training
38
+ multi_scale: False # (bool) Whether to use multiscale during training
39
+ # Segmentation
40
+ overlap_mask: True # (bool) masks should overlap during training (segment train only)
41
+ mask_ratio: 4 # (int) mask downsample ratio (segment train only)
42
+ # Classification
43
+ dropout: 0.0 # (float) use dropout regularization (classify train only)
44
+
45
+ # Val/Test settings ----------------------------------------------------------------------------------------------------
46
+ val: True # (bool) validate/test during training
47
+ split: val # (str) dataset split to use for validation, i.e. 'val', 'test' or 'train'
48
+ save_json: False # (bool) save results to JSON file
49
+ save_hybrid: False # (bool) save hybrid version of labels (labels + additional predictions)
50
+ conf: # (float, optional) object confidence threshold for detection (default 0.25 predict, 0.001 val)
51
+ iou: 0.7 # (float) intersection over union (IoU) threshold for NMS
52
+ max_det: 300 # (int) maximum number of detections per image
53
+ half: False # (bool) use half precision (FP16)
54
+ dnn: False # (bool) use OpenCV DNN for ONNX inference
55
+ plots: True # (bool) save plots and images during train/val
56
+
57
+ # Predict settings -----------------------------------------------------------------------------------------------------
58
+ source: # (str, optional) source directory for images or videos
59
+ vid_stride: 1 # (int) video frame-rate stride
60
+ stream_buffer: False # (bool) buffer all streaming frames (True) or return the most recent frame (False)
61
+ visualize: False # (bool) visualize model features
62
+ augment: False # (bool) apply image augmentation to prediction sources
63
+ agnostic_nms: False # (bool) class-agnostic NMS
64
+ classes: # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
65
+ retina_masks: False # (bool) use high-resolution segmentation masks
66
+ embed: # (list[int], optional) return feature vectors/embeddings from given layers
67
+
68
+ # Visualize settings ---------------------------------------------------------------------------------------------------
69
+ show: False # (bool) show predicted images and videos if environment allows
70
+ save_frames: False # (bool) save predicted individual video frames
71
+ save_txt: False # (bool) save results as .txt file
72
+ save_conf: False # (bool) save results with confidence scores
73
+ save_crop: False # (bool) save cropped images with results
74
+ show_labels: True # (bool) show prediction labels, i.e. 'person'
75
+ show_conf: True # (bool) show prediction confidence, i.e. '0.99'
76
+ show_boxes: True # (bool) show prediction boxes
77
+ line_width: # (int, optional) line width of the bounding boxes. Scaled to image size if None.
78
+
79
+ # Export settings ------------------------------------------------------------------------------------------------------
80
+ format: torchscript # (str) format to export to, choices at https://docs.doclayout_yolo.com/modes/export/#export-formats
81
+ keras: False # (bool) use Kera=s
82
+ optimize: False # (bool) TorchScript: optimize for mobile
83
+ int8: False # (bool) CoreML/TF INT8 quantization
84
+ dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
85
+ simplify: False # (bool) ONNX: simplify model
86
+ opset: # (int, optional) ONNX: opset version
87
+ workspace: 4 # (int) TensorRT: workspace size (GB)
88
+ nms: False # (bool) CoreML: add NMS
89
+
90
+ # Hyperparameters ------------------------------------------------------------------------------------------------------
91
+ lr0: 0.01 # (float) initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
92
+ lrf: 0.01 # (float) final learning rate (lr0 * lrf)
93
+ momentum: 0.937 # (float) SGD momentum/Adam beta1
94
+ weight_decay: 0.0005 # (float) optimizer weight decay 5e-4
95
+ warmup_epochs: 3.0 # (float) warmup epochs (fractions ok)
96
+ warmup_momentum: 0.8 # (float) warmup initial momentum
97
+ warmup_bias_lr: 0.1 # (float) warmup initial bias lr
98
+ box: 7.5 # (float) box loss gain
99
+ cls: 0.5 # (float) cls loss gain (scale with pixels)
100
+ dfl: 1.5 # (float) dfl loss gain
101
+ pose: 12.0 # (float) pose loss gain
102
+ kobj: 1.0 # (float) keypoint obj loss gain
103
+ label_smoothing: 0.0 # (float) label smoothing (fraction)
104
+ nbs: 64 # (int) nominal batch size
105
+ hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
106
+ hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
107
+ hsv_v: 0.4 # (float) image HSV-Value augmentation (fraction)
108
+ degrees: 0.0 # (float) image rotation (+/- deg)
109
+ translate: 0.1 # (float) image translation (+/- fraction)
110
+ scale: 0.5 # (float) image scale (+/- gain)
111
+ shear: 0.0 # (float) image shear (+/- deg)
112
+ perspective: 0.0 # (float) image perspective (+/- fraction), range 0-0.001
113
+ flipud: 0.0 # (float) image flip up-down (probability)
114
+ fliplr: 0.5 # (float) image flip left-right (probability)
115
+ bgr: 0.0 # (float) image channel BGR (probability)
116
+ mosaic: 1.0 # (float) image mosaic (probability)
117
+ mixup: 0.0 # (float) image mixup (probability)
118
+ copy_paste: 0.0 # (float) segment copy-paste (probability)
119
+ auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
120
+ erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
121
+ crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
122
+
123
+ # Custom config.yaml ---------------------------------------------------------------------------------------------------
124
+ cfg: # (str, optional) for overriding defaults.yaml
125
+
126
+ # Tracker settings ------------------------------------------------------------------------------------------------------
127
+ tracker: botsort.yaml # (str) tracker type, choices=[botsort.yaml, bytetrack.yaml]
doclayout_yolo/cfg/models/README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Models
2
+
3
+ Welcome to the Ultralytics Models directory! Here you will find a wide variety of pre-configured model configuration files (`*.yaml`s) that can be used to create custom YOLO models. The models in this directory have been expertly crafted and fine-tuned by the Ultralytics team to provide the best performance for a wide range of object detection and image segmentation tasks.
4
+
5
+ These model configurations cover a wide range of scenarios, from simple object detection to more complex tasks like instance segmentation and object tracking. They are also designed to run efficiently on a variety of hardware platforms, from CPUs to GPUs. Whether you are a seasoned machine learning practitioner or just getting started with YOLO, this directory provides a great starting point for your custom model development needs.
6
+
7
+ To get started, simply browse through the models in this directory and find one that best suits your needs. Once you've selected a model, you can use the provided `*.yaml` file to train and deploy your custom YOLO model with ease. See full details at the Ultralytics [Docs](https://docs.doclayout_yolo.com/models), and if you need help or have any questions, feel free to reach out to the Ultralytics team for support. So, don't wait, start creating your custom YOLO model now!
8
+
9
+ ### Usage
10
+
11
+ Model `*.yaml` files may be used directly in the Command Line Interface (CLI) with a `yolo` command:
12
+
13
+ ```bash
14
+ yolo task=detect mode=train model=yolov8n.yaml data=coco128.yaml epochs=100
15
+ ```
16
+
17
+ They may also be used directly in a Python environment, and accepts the same [arguments](https://docs.doclayout_yolo.com/usage/cfg/) as in the CLI example above:
18
+
19
+ ```python
20
+ from doclayout_yolo import YOLO
21
+
22
+ model = YOLO("model.yaml") # build a YOLOv8n model from scratch
23
+ # YOLO("model.pt") use pre-trained model if available
24
+ model.info() # display model information
25
+ model.train(data="coco128.yaml", epochs=100) # train the model
26
+ ```
27
+
28
+ ## Pre-trained Model Architectures
29
+
30
+ Ultralytics supports many model architectures. Visit https://docs.doclayout_yolo.com/models to view detailed information and usage. Any of these models can be used by loading their configs or pretrained checkpoints if available.
31
+
32
+ ## Contribute New Models
33
+
34
+ Have you trained a new YOLO variant or achieved state-of-the-art performance with specific tuning? We'd love to showcase your work in our Models section! Contributions from the community in the form of new models, architectures, or optimizations are highly valued and can significantly enrich our repository.
35
+
36
+ By contributing to this section, you're helping us offer a wider array of model choices and configurations to the community. It's a fantastic way to share your knowledge and expertise while making the Ultralytics YOLO ecosystem even more versatile.
37
+
38
+ To get started, please consult our [Contributing Guide](https://docs.doclayout_yolo.com/help/contributing) for step-by-step instructions on how to submit a Pull Request (PR) 🛠️. Your contributions are eagerly awaited!
39
+
40
+ Let's join hands to extend the range and capabilities of the Ultralytics YOLO models 🙏!
doclayout_yolo/cfg/models/rt-detr/rtdetr-l.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/rtdetr
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ l: [1.00, 1.00, 1024]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, HGStem, [32, 48]] # 0-P2/4
13
+ - [-1, 6, HGBlock, [48, 128, 3]] # stage 1
14
+
15
+ - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
+ - [-1, 6, HGBlock, [96, 512, 3]] # stage 2
17
+
18
+ - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16
19
+ - [-1, 6, HGBlock, [192, 1024, 5, True, False]] # cm, c2, k, light, shortcut
20
+ - [-1, 6, HGBlock, [192, 1024, 5, True, True]]
21
+ - [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3
22
+
23
+ - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32
24
+ - [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4
25
+
26
+ head:
27
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2
28
+ - [-1, 1, AIFI, [1024, 8]]
29
+ - [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0
30
+
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1
33
+ - [[-2, -1], 1, Concat, [1]]
34
+ - [-1, 3, RepC3, [256]] # 16, fpn_blocks.0
35
+ - [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1
36
+
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0
39
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
40
+ - [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1
41
+
42
+ - [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0
43
+ - [[-1, 17], 1, Concat, [1]] # cat Y4
44
+ - [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0
45
+
46
+ - [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1
47
+ - [[-1, 12], 1, Concat, [1]] # cat Y5
48
+ - [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1
49
+
50
+ - [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet101.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-ResNet101 object detection model with P3-P5 outputs.
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ l: [1.00, 1.00, 1024]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3
16
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17
+
18
+ head:
19
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20
+ - [-1, 1, AIFI, [1024, 8]]
21
+ - [-1, 1, Conv, [256, 1, 1]] # 7
22
+
23
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25
+ - [[-2, -1], 1, Concat, [1]]
26
+ - [-1, 3, RepC3, [256]] # 11
27
+ - [-1, 1, Conv, [256, 1, 1]] # 12
28
+
29
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
30
+ - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33
+
34
+ - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
+ - [[-1, 12], 1, Concat, [1]] # cat Y4
36
+ - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
+ - [[-1, 7], 1, Concat, [1]] # cat Y5
40
+ - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41
+
42
+ - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/rt-detr/rtdetr-resnet50.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-ResNet50 object detection model with P3-P5 outputs.
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ l: [1.00, 1.00, 1024]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0
13
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1
14
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2
15
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3
16
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4
17
+
18
+ head:
19
+ - [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 5
20
+ - [-1, 1, AIFI, [1024, 8]]
21
+ - [-1, 1, Conv, [256, 1, 1]] # 7
22
+
23
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24
+ - [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 9
25
+ - [[-2, -1], 1, Concat, [1]]
26
+ - [-1, 3, RepC3, [256]] # 11
27
+ - [-1, 1, Conv, [256, 1, 1]] # 12
28
+
29
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
30
+ - [2, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14
31
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, RepC3, [256]] # X3 (16), fpn_blocks.1
33
+
34
+ - [-1, 1, Conv, [256, 3, 2]] # 17, downsample_convs.0
35
+ - [[-1, 12], 1, Concat, [1]] # cat Y4
36
+ - [-1, 3, RepC3, [256]] # F4 (19), pan_blocks.0
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]] # 20, downsample_convs.1
39
+ - [[-1, 7], 1, Concat, [1]] # cat Y5
40
+ - [-1, 3, RepC3, [256]] # F5 (22), pan_blocks.1
41
+
42
+ - [[16, 19, 22], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/rt-detr/rtdetr-x.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # RT-DETR-x object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/rtdetr
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ x: [1.00, 1.00, 2048]
9
+
10
+ backbone:
11
+ # [from, repeats, module, args]
12
+ - [-1, 1, HGStem, [32, 64]] # 0-P2/4
13
+ - [-1, 6, HGBlock, [64, 128, 3]] # stage 1
14
+
15
+ - [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8
16
+ - [-1, 6, HGBlock, [128, 512, 3]]
17
+ - [-1, 6, HGBlock, [128, 512, 3, False, True]] # 4-stage 2
18
+
19
+ - [-1, 1, DWConv, [512, 3, 2, 1, False]] # 5-P3/16
20
+ - [-1, 6, HGBlock, [256, 1024, 5, True, False]] # cm, c2, k, light, shortcut
21
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
22
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
23
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]]
24
+ - [-1, 6, HGBlock, [256, 1024, 5, True, True]] # 10-stage 3
25
+
26
+ - [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 11-P4/32
27
+ - [-1, 6, HGBlock, [512, 2048, 5, True, False]]
28
+ - [-1, 6, HGBlock, [512, 2048, 5, True, True]] # 13-stage 4
29
+
30
+ head:
31
+ - [-1, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 14 input_proj.2
32
+ - [-1, 1, AIFI, [2048, 8]]
33
+ - [-1, 1, Conv, [384, 1, 1]] # 16, Y5, lateral_convs.0
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36
+ - [10, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 18 input_proj.1
37
+ - [[-2, -1], 1, Concat, [1]]
38
+ - [-1, 3, RepC3, [384]] # 20, fpn_blocks.0
39
+ - [-1, 1, Conv, [384, 1, 1]] # 21, Y4, lateral_convs.1
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [4, 1, Conv, [384, 1, 1, None, 1, 1, False]] # 23 input_proj.0
43
+ - [[-2, -1], 1, Concat, [1]] # cat backbone P4
44
+ - [-1, 3, RepC3, [384]] # X3 (25), fpn_blocks.1
45
+
46
+ - [-1, 1, Conv, [384, 3, 2]] # 26, downsample_convs.0
47
+ - [[-1, 21], 1, Concat, [1]] # cat Y4
48
+ - [-1, 3, RepC3, [384]] # F4 (28), pan_blocks.0
49
+
50
+ - [-1, 1, Conv, [384, 3, 2]] # 29, downsample_convs.1
51
+ - [[-1, 16], 1, Concat, [1]] # cat Y5
52
+ - [-1, 3, RepC3, [384]] # F5 (31), pan_blocks.1
53
+
54
+ - [[25, 28, 31], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10b.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parameters
2
+ nc: 80 # number of classes
3
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
4
+ # [depth, width, max_channels]
5
+ b: [0.67, 1.00, 512]
6
+
7
+ # YOLOv8.0n backbone
8
+ backbone:
9
+ # [from, repeats, module, args]
10
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
11
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
12
+ - [-1, 3, C2f, [128, True]]
13
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
14
+ - [-1, 6, C2f, [256, True]]
15
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
16
+ - [-1, 6, C2f, [512, True]]
17
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
18
+ - [-1, 3, C2fCIB, [1024, True]]
19
+ - [-1, 1, SPPF, [1024, 5]] # 9
20
+ - [-1, 1, PSA, [1024]] # 10
21
+
22
+ # YOLOv8.0n head
23
+ head:
24
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
25
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26
+ - [-1, 3, C2fCIB, [512, True]] # 13
27
+
28
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
29
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
31
+
32
+ - [-1, 1, Conv, [256, 3, 2]]
33
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
34
+ - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
35
+
36
+ - [-1, 1, SCDown, [512, 3, 2]]
37
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
38
+ - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
39
+
40
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10l.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parameters
2
+ nc: 80 # number of classes
3
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
4
+ # [depth, width, max_channels]
5
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
6
+
7
+ # YOLOv8.0n backbone
8
+ backbone:
9
+ # [from, repeats, module, args]
10
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
11
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
12
+ - [-1, 3, C2f, [128, True]]
13
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
14
+ - [-1, 6, C2f, [256, True]]
15
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
16
+ - [-1, 6, C2f, [512, True]]
17
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
18
+ - [-1, 3, C2fCIB, [1024, True]]
19
+ - [-1, 1, SPPF, [1024, 5]] # 9
20
+ - [-1, 1, PSA, [1024]] # 10
21
+
22
+ # YOLOv8.0n head
23
+ head:
24
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
25
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26
+ - [-1, 3, C2fCIB, [512, True]] # 13
27
+
28
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
29
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
31
+
32
+ - [-1, 1, Conv, [256, 3, 2]]
33
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
34
+ - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
35
+
36
+ - [-1, 1, SCDown, [512, 3, 2]]
37
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
38
+ - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
39
+
40
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10m-doclayout.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
9
+
10
+ # YOLOv8.0n backbone
11
+ backbone:
12
+ # [from, repeats, module, args]
13
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
14
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
15
+ - [-1, 3, C2f, [128, True]]
16
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
17
+ - [-1, 6, G2L_CRM, [256, True, True, [1,2,3], 5, "glu"]]
18
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
19
+ - [-1, 6, G2L_CRM, [512, True, True, [1,3,5], 3, "glu"]]
20
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
21
+ - [-1, 6, G2L_CRM, [1024, True, False]]
22
+ - [-1, 1, SPPF, [1024, 5]] # 9
23
+ - [-1, 1, PSA, [1024]] # 10
24
+
25
+ # YOLOv8.0n head
26
+ head:
27
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
28
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
29
+ - [-1, 3, C2f, [512]] # 13
30
+
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
33
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
34
+
35
+ - [-1, 1, Conv, [256, 3, 2]]
36
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
37
+ - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
38
+
39
+ - [-1, 1, SCDown, [512, 3, 2]]
40
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
41
+ - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
42
+
43
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10m.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
9
+
10
+ # YOLOv8.0n backbone
11
+ backbone:
12
+ # [from, repeats, module, args]
13
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
14
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
15
+ - [-1, 3, C2f, [128, True]]
16
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
17
+ - [-1, 6, C2f, [256, True]]
18
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
19
+ - [-1, 6, C2f, [512, True]]
20
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
21
+ - [-1, 3, C2fCIB, [1024, True]]
22
+ - [-1, 1, SPPF, [1024, 5]] # 9
23
+ - [-1, 1, PSA, [1024]] # 10
24
+
25
+ # YOLOv8.0n head
26
+ head:
27
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
28
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
29
+ - [-1, 3, C2f, [512]] # 13
30
+
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
33
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
34
+
35
+ - [-1, 1, Conv, [256, 3, 2]]
36
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
37
+ - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
38
+
39
+ - [-1, 1, SCDown, [512, 3, 2]]
40
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
41
+ - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
42
+
43
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10n.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parameters
2
+ nc: 80 # number of classes
3
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
4
+ # [depth, width, max_channels]
5
+ n: [0.33, 0.25, 1024]
6
+
7
+ # YOLOv8.0n backbone
8
+ backbone:
9
+ # [from, repeats, module, args]
10
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
11
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
12
+ - [-1, 3, C2f, [128, True]]
13
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
14
+ - [-1, 6, C2f, [256, True]]
15
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
16
+ - [-1, 6, C2f, [512, True]]
17
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
18
+ - [-1, 3, C2f, [1024, True]]
19
+ - [-1, 1, SPPF, [1024, 5]] # 9
20
+ - [-1, 1, PSA, [1024]] # 10
21
+
22
+ # YOLOv8.0n head
23
+ head:
24
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
25
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26
+ - [-1, 3, C2f, [512]] # 13
27
+
28
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
29
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
31
+
32
+ - [-1, 1, Conv, [256, 3, 2]]
33
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
34
+ - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
35
+
36
+ - [-1, 1, SCDown, [512, 3, 2]]
37
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
38
+ - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
39
+
40
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10s.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parameters
2
+ nc: 80 # number of classes
3
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
4
+ # [depth, width, max_channels]
5
+ s: [0.33, 0.50, 1024]
6
+
7
+ backbone:
8
+ # [from, repeats, module, args]
9
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
10
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
11
+ - [-1, 3, C2f, [128, True]]
12
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
13
+ - [-1, 6, C2f, [256, True]]
14
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
15
+ - [-1, 6, C2f, [512, True]]
16
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
17
+ - [-1, 3, C2fCIB, [1024, True, True]]
18
+ - [-1, 1, SPPF, [1024, 5]] # 9
19
+ - [-1, 1, PSA, [1024]] # 10
20
+
21
+ # YOLOv8.0n head
22
+ head:
23
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
24
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
25
+ - [-1, 3, C2f, [512]] # 13
26
+
27
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
28
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
29
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
30
+
31
+ - [-1, 1, Conv, [256, 3, 2]]
32
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
33
+ - [-1, 3, C2f, [512]] # 19 (P4/16-medium)
34
+
35
+ - [-1, 1, SCDown, [512, 3, 2]]
36
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
37
+ - [-1, 3, C2fCIB, [1024, True, True]] # 22 (P5/32-large)
38
+
39
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v10/yolov10x.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Parameters
2
+ nc: 80 # number of classes
3
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
4
+ # [depth, width, max_channels]
5
+ x: [1.00, 1.25, 512]
6
+
7
+ # YOLOv8.0n backbone
8
+ backbone:
9
+ # [from, repeats, module, args]
10
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
11
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
12
+ - [-1, 3, C2f, [128, True]]
13
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
14
+ - [-1, 6, C2f, [256, True]]
15
+ - [-1, 1, SCDown, [512, 3, 2]] # 5-P4/16
16
+ - [-1, 6, C2fCIB, [512, True]]
17
+ - [-1, 1, SCDown, [1024, 3, 2]] # 7-P5/32
18
+ - [-1, 3, C2fCIB, [1024, True]]
19
+ - [-1, 1, SPPF, [1024, 5]] # 9
20
+ - [-1, 1, PSA, [1024]] # 10
21
+
22
+ # YOLOv8.0n head
23
+ head:
24
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
25
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
26
+ - [-1, 3, C2fCIB, [512, True]] # 13
27
+
28
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
29
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
30
+ - [-1, 3, C2f, [256]] # 16 (P3/8-small)
31
+
32
+ - [-1, 1, Conv, [256, 3, 2]]
33
+ - [[-1, 13], 1, Concat, [1]] # cat head P4
34
+ - [-1, 3, C2fCIB, [512, True]] # 19 (P4/16-medium)
35
+
36
+ - [-1, 1, SCDown, [512, 3, 2]]
37
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
38
+ - [-1, 3, C2fCIB, [1024, True]] # 22 (P5/32-large)
39
+
40
+ - [[16, 19, 22], 1, v10Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v3/yolov3-spp.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv3-SPP object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov3
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
+
9
+ # darknet53 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ - [-1, 1, Conv, [32, 3, 1]] # 0
13
+ - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
14
+ - [-1, 1, Bottleneck, [64]]
15
+ - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
16
+ - [-1, 2, Bottleneck, [128]]
17
+ - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
18
+ - [-1, 8, Bottleneck, [256]]
19
+ - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
20
+ - [-1, 8, Bottleneck, [512]]
21
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
22
+ - [-1, 4, Bottleneck, [1024]] # 10
23
+
24
+ # YOLOv3-SPP head
25
+ head:
26
+ - [-1, 1, Bottleneck, [1024, False]]
27
+ - [-1, 1, SPP, [512, [5, 9, 13]]]
28
+ - [-1, 1, Conv, [1024, 3, 1]]
29
+ - [-1, 1, Conv, [512, 1, 1]]
30
+ - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
31
+
32
+ - [-2, 1, Conv, [256, 1, 1]]
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35
+ - [-1, 1, Bottleneck, [512, False]]
36
+ - [-1, 1, Bottleneck, [512, False]]
37
+ - [-1, 1, Conv, [256, 1, 1]]
38
+ - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
39
+
40
+ - [-2, 1, Conv, [128, 1, 1]]
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 1, Bottleneck, [256, False]]
44
+ - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
45
+
46
+ - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v3/yolov3-tiny.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv3-tiny object detection model with P4-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov3
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
+
9
+ # YOLOv3-tiny backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ - [-1, 1, Conv, [16, 3, 1]] # 0
13
+ - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 1-P1/2
14
+ - [-1, 1, Conv, [32, 3, 1]]
15
+ - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 3-P2/4
16
+ - [-1, 1, Conv, [64, 3, 1]]
17
+ - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 5-P3/8
18
+ - [-1, 1, Conv, [128, 3, 1]]
19
+ - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 7-P4/16
20
+ - [-1, 1, Conv, [256, 3, 1]]
21
+ - [-1, 1, nn.MaxPool2d, [2, 2, 0]] # 9-P5/32
22
+ - [-1, 1, Conv, [512, 3, 1]]
23
+ - [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]] # 11
24
+ - [-1, 1, nn.MaxPool2d, [2, 1, 0]] # 12
25
+
26
+ # YOLOv3-tiny head
27
+ head:
28
+ - [-1, 1, Conv, [1024, 3, 1]]
29
+ - [-1, 1, Conv, [256, 1, 1]]
30
+ - [-1, 1, Conv, [512, 3, 1]] # 15 (P5/32-large)
31
+
32
+ - [-2, 1, Conv, [128, 1, 1]]
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35
+ - [-1, 1, Conv, [256, 3, 1]] # 19 (P4/16-medium)
36
+
37
+ - [[19, 15], 1, Detect, [nc]] # Detect(P4, P5)
doclayout_yolo/cfg/models/v3/yolov3.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv3 object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov3
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ depth_multiple: 1.0 # model depth multiple
7
+ width_multiple: 1.0 # layer channel multiple
8
+
9
+ # darknet53 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ - [-1, 1, Conv, [32, 3, 1]] # 0
13
+ - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
14
+ - [-1, 1, Bottleneck, [64]]
15
+ - [-1, 1, Conv, [128, 3, 2]] # 3-P2/4
16
+ - [-1, 2, Bottleneck, [128]]
17
+ - [-1, 1, Conv, [256, 3, 2]] # 5-P3/8
18
+ - [-1, 8, Bottleneck, [256]]
19
+ - [-1, 1, Conv, [512, 3, 2]] # 7-P4/16
20
+ - [-1, 8, Bottleneck, [512]]
21
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P5/32
22
+ - [-1, 4, Bottleneck, [1024]] # 10
23
+
24
+ # YOLOv3 head
25
+ head:
26
+ - [-1, 1, Bottleneck, [1024, False]]
27
+ - [-1, 1, Conv, [512, 1, 1]]
28
+ - [-1, 1, Conv, [1024, 3, 1]]
29
+ - [-1, 1, Conv, [512, 1, 1]]
30
+ - [-1, 1, Conv, [1024, 3, 1]] # 15 (P5/32-large)
31
+
32
+ - [-2, 1, Conv, [256, 1, 1]]
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P4
35
+ - [-1, 1, Bottleneck, [512, False]]
36
+ - [-1, 1, Bottleneck, [512, False]]
37
+ - [-1, 1, Conv, [256, 1, 1]]
38
+ - [-1, 1, Conv, [512, 3, 1]] # 22 (P4/16-medium)
39
+
40
+ - [-2, 1, Conv, [128, 1, 1]]
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 1, Bottleneck, [256, False]]
44
+ - [-1, 2, Bottleneck, [256, False]] # 27 (P3/8-small)
45
+
46
+ - [[27, 22, 15], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v5/yolov5-p6.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv5 object detection model with P3-P6 outputs. For details see https://docs.doclayout_yolo.com/models/yolov5
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov5n-p6.yaml' will call yolov5-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.33, 1.25, 1024]
13
+
14
+ # YOLOv5 v6.0 backbone
15
+ backbone:
16
+ # [from, number, module, args]
17
+ - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C3, [128]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C3, [256]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 9, C3, [512]]
24
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C3, [768]]
26
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C3, [1024]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv5 v6.0 head
31
+ head:
32
+ - [-1, 1, Conv, [768, 1, 1]]
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
35
+ - [-1, 3, C3, [768, False]] # 15
36
+
37
+ - [-1, 1, Conv, [512, 1, 1]]
38
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
40
+ - [-1, 3, C3, [512, False]] # 19
41
+
42
+ - [-1, 1, Conv, [256, 1, 1]]
43
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
44
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
45
+ - [-1, 3, C3, [256, False]] # 23 (P3/8-small)
46
+
47
+ - [-1, 1, Conv, [256, 3, 2]]
48
+ - [[-1, 20], 1, Concat, [1]] # cat head P4
49
+ - [-1, 3, C3, [512, False]] # 26 (P4/16-medium)
50
+
51
+ - [-1, 1, Conv, [512, 3, 2]]
52
+ - [[-1, 16], 1, Concat, [1]] # cat head P5
53
+ - [-1, 3, C3, [768, False]] # 29 (P5/32-large)
54
+
55
+ - [-1, 1, Conv, [768, 3, 2]]
56
+ - [[-1, 12], 1, Concat, [1]] # cat head P6
57
+ - [-1, 3, C3, [1024, False]] # 32 (P6/64-xlarge)
58
+
59
+ - [[23, 26, 29, 32], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
doclayout_yolo/cfg/models/v5/yolov5.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv5 object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/models/yolov5
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov5n.yaml' will call yolov5.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.33, 1.25, 1024]
13
+
14
+ # YOLOv5 v6.0 backbone
15
+ backbone:
16
+ # [from, number, module, args]
17
+ - [-1, 1, Conv, [64, 6, 2, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C3, [128]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C3, [256]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 9, C3, [512]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C3, [1024]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv5 v6.0 head
29
+ head:
30
+ - [-1, 1, Conv, [512, 1, 1]]
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 3, C3, [512, False]] # 13
34
+
35
+ - [-1, 1, Conv, [256, 1, 1]]
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
38
+ - [-1, 3, C3, [256, False]] # 17 (P3/8-small)
39
+
40
+ - [-1, 1, Conv, [256, 3, 2]]
41
+ - [[-1, 14], 1, Concat, [1]] # cat head P4
42
+ - [-1, 3, C3, [512, False]] # 20 (P4/16-medium)
43
+
44
+ - [-1, 1, Conv, [512, 3, 2]]
45
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
46
+ - [-1, 3, C3, [1024, False]] # 23 (P5/32-large)
47
+
48
+ - [[17, 20, 23], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v6/yolov6.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv6 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/models/yolov6
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ activation: nn.ReLU() # (optional) model default activation function
7
+ scales: # model compound scaling constants, i.e. 'model=yolov6n.yaml' will call yolov8.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv6-3.0s backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 6, Conv, [128, 3, 1]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 12, Conv, [256, 3, 1]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 18, Conv, [512, 3, 1]]
25
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 6, Conv, [1024, 3, 1]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv6-3.0s head
30
+ head:
31
+ - [-1, 1, Conv, [256, 1, 1]]
32
+ - [-1, 1, nn.ConvTranspose2d, [256, 2, 2, 0]]
33
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
34
+ - [-1, 1, Conv, [256, 3, 1]]
35
+ - [-1, 9, Conv, [256, 3, 1]] # 14
36
+
37
+ - [-1, 1, Conv, [128, 1, 1]]
38
+ - [-1, 1, nn.ConvTranspose2d, [128, 2, 2, 0]]
39
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
40
+ - [-1, 1, Conv, [128, 3, 1]]
41
+ - [-1, 9, Conv, [128, 3, 1]] # 19
42
+
43
+ - [-1, 1, Conv, [128, 3, 2]]
44
+ - [[-1, 15], 1, Concat, [1]] # cat head P4
45
+ - [-1, 1, Conv, [256, 3, 1]]
46
+ - [-1, 9, Conv, [256, 3, 1]] # 23
47
+
48
+ - [-1, 1, Conv, [256, 3, 2]]
49
+ - [[-1, 10], 1, Concat, [1]] # cat head P5
50
+ - [-1, 1, Conv, [512, 3, 1]]
51
+ - [-1, 9, Conv, [512, 3, 1]] # 27
52
+
53
+ - [[19, 23, 27], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-cls-resnet101.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-cls image classification model. For Usage examples see https://docs.doclayout_yolo.com/tasks/classify
3
+
4
+ # Parameters
5
+ nc: 1000 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.00, 1.25, 1024]
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
18
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
19
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
20
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 23]] # 3-P4/16
21
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
22
+
23
+ # YOLOv8.0n head
24
+ head:
25
+ - [-1, 1, Classify, [nc]] # Classify
doclayout_yolo/cfg/models/v8/yolov8-cls-resnet50.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-cls image classification model. For Usage examples see https://docs.doclayout_yolo.com/tasks/classify
3
+
4
+ # Parameters
5
+ nc: 1000 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.00, 1.25, 1024]
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, ResNetLayer, [3, 64, 1, True, 1]] # 0-P1/2
18
+ - [-1, 1, ResNetLayer, [64, 64, 1, False, 3]] # 1-P2/4
19
+ - [-1, 1, ResNetLayer, [256, 128, 2, False, 4]] # 2-P3/8
20
+ - [-1, 1, ResNetLayer, [512, 256, 2, False, 6]] # 3-P4/16
21
+ - [-1, 1, ResNetLayer, [1024, 512, 2, False, 3]] # 4-P5/32
22
+
23
+ # YOLOv8.0n head
24
+ head:
25
+ - [-1, 1, Classify, [nc]] # Classify
doclayout_yolo/cfg/models/v8/yolov8-cls.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-cls image classification model. For Usage examples see https://docs.doclayout_yolo.com/tasks/classify
3
+
4
+ # Parameters
5
+ nc: 1000 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 1024]
11
+ l: [1.00, 1.00, 1024]
12
+ x: [1.00, 1.25, 1024]
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+
27
+ # YOLOv8.0n head
28
+ head:
29
+ - [-1, 1, Classify, [nc]] # Classify
doclayout_yolo/cfg/models/v8/yolov8-ghost-p2.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p2 summary: 491 layers, 2033944 parameters, 2033928 gradients, 13.8 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p2 summary: 491 layers, 5562080 parameters, 5562064 gradients, 25.1 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m-ghost-p2 summary: 731 layers, 9031728 parameters, 9031712 gradients, 42.8 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l-ghost-p2 summary: 971 layers, 12214448 parameters, 12214432 gradients, 69.1 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x-ghost-p2 summary: 971 layers, 18664776 parameters, 18664760 gradients, 103.3 GFLOPs
13
+
14
+ # YOLOv8.0-ghost backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C3Ghost, [128, True]]
20
+ - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C3Ghost, [256, True]]
22
+ - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C3Ghost, [512, True]]
24
+ - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C3Ghost, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0-ghost-p2 head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C3Ghost, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39
+ - [[-1, 2], 1, Concat, [1]] # cat backbone P2
40
+ - [-1, 3, C3Ghost, [128]] # 18 (P2/4-xsmall)
41
+
42
+ - [-1, 1, GhostConv, [128, 3, 2]]
43
+ - [[-1, 15], 1, Concat, [1]] # cat head P3
44
+ - [-1, 3, C3Ghost, [256]] # 21 (P3/8-small)
45
+
46
+ - [-1, 1, GhostConv, [256, 3, 2]]
47
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
48
+ - [-1, 3, C3Ghost, [512]] # 24 (P4/16-medium)
49
+
50
+ - [-1, 1, GhostConv, [512, 3, 2]]
51
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
52
+ - [-1, 3, C3Ghost, [1024]] # 27 (P5/32-large)
53
+
54
+ - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-ghost-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n-ghost-p6 summary: 529 layers, 2901100 parameters, 2901084 gradients, 5.8 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s-ghost-p6 summary: 529 layers, 9520008 parameters, 9519992 gradients, 16.4 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m-ghost-p6 summary: 789 layers, 18002904 parameters, 18002888 gradients, 34.4 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l-ghost-p6 summary: 1049 layers, 21227584 parameters, 21227568 gradients, 55.3 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x-ghost-p6 summary: 1049 layers, 33057852 parameters, 33057836 gradients, 85.7 GFLOPs
13
+
14
+ # YOLOv8.0-ghost backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C3Ghost, [128, True]]
20
+ - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C3Ghost, [256, True]]
22
+ - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C3Ghost, [512, True]]
24
+ - [-1, 1, GhostConv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C3Ghost, [768, True]]
26
+ - [-1, 1, GhostConv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C3Ghost, [1024, True]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv8.0-ghost-p6 head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34
+ - [-1, 3, C3Ghost, [768]] # 14
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 3, C3Ghost, [512]] # 17
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 3, C3Ghost, [256]] # 20 (P3/8-small)
43
+
44
+ - [-1, 1, GhostConv, [256, 3, 2]]
45
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
46
+ - [-1, 3, C3Ghost, [512]] # 23 (P4/16-medium)
47
+
48
+ - [-1, 1, GhostConv, [512, 3, 2]]
49
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
50
+ - [-1, 3, C3Ghost, [768]] # 26 (P5/32-large)
51
+
52
+ - [-1, 1, GhostConv, [768, 3, 2]]
53
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
54
+ - [-1, 3, C3Ghost, [1024]] # 29 (P6/64-xlarge)
55
+
56
+ - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
doclayout_yolo/cfg/models/v8/yolov8-ghost.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+ # Employs Ghost convolutions and modules proposed in Huawei's GhostNet in https://arxiv.org/abs/1911.11907v2
4
+
5
+ # Parameters
6
+ nc: 80 # number of classes
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024] # YOLOv8n-ghost summary: 403 layers, 1865316 parameters, 1865300 gradients, 5.8 GFLOPs
10
+ s: [0.33, 0.50, 1024] # YOLOv8s-ghost summary: 403 layers, 5960072 parameters, 5960056 gradients, 16.4 GFLOPs
11
+ m: [0.67, 0.75, 768] # YOLOv8m-ghost summary: 603 layers, 10336312 parameters, 10336296 gradients, 32.7 GFLOPs
12
+ l: [1.00, 1.00, 512] # YOLOv8l-ghost summary: 803 layers, 14277872 parameters, 14277856 gradients, 53.7 GFLOPs
13
+ x: [1.00, 1.25, 512] # YOLOv8x-ghost summary: 803 layers, 22229308 parameters, 22229292 gradients, 83.3 GFLOPs
14
+
15
+ # YOLOv8.0n-ghost backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, GhostConv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C3Ghost, [128, True]]
21
+ - [-1, 1, GhostConv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C3Ghost, [256, True]]
23
+ - [-1, 1, GhostConv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C3Ghost, [512, True]]
25
+ - [-1, 1, GhostConv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C3Ghost, [1024, True]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv8.0n head
30
+ head:
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 3, C3Ghost, [512]] # 12
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37
+ - [-1, 3, C3Ghost, [256]] # 15 (P3/8-small)
38
+
39
+ - [-1, 1, GhostConv, [256, 3, 2]]
40
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
41
+ - [-1, 3, C3Ghost, [512]] # 18 (P4/16-medium)
42
+
43
+ - [-1, 1, GhostConv, [512, 3, 2]]
44
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
45
+ - [-1, 3, C3Ghost, [1024]] # 21 (P5/32-large)
46
+
47
+ - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-obb.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 Oriented Bounding Boxes (OBB) model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, OBB, [nc, 1]] # OBB(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-p2.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P2-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0 backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0-p2 head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
39
+ - [[-1, 2], 1, Concat, [1]] # cat backbone P2
40
+ - [-1, 3, C2f, [128]] # 18 (P2/4-xsmall)
41
+
42
+ - [-1, 1, Conv, [128, 3, 2]]
43
+ - [[-1, 15], 1, Concat, [1]] # cat head P3
44
+ - [-1, 3, C2f, [256]] # 21 (P3/8-small)
45
+
46
+ - [-1, 1, Conv, [256, 3, 2]]
47
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
48
+ - [-1, 3, C2f, [512]] # 24 (P4/16-medium)
49
+
50
+ - [-1, 1, Conv, [512, 3, 2]]
51
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
52
+ - [-1, 3, C2f, [1024]] # 27 (P5/32-large)
53
+
54
+ - [[18, 21, 24, 27], 1, Detect, [nc]] # Detect(P2, P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P6 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0x6 backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [768, True]]
26
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C2f, [1024, True]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv8.0x6 head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34
+ - [-1, 3, C2, [768, False]] # 14
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 3, C2, [512, False]] # 17
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
46
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
50
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
51
+
52
+ - [-1, 1, Conv, [768, 3, 2]]
53
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
54
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
55
+
56
+ - [[20, 23, 26, 29], 1, Detect, [nc]] # Detect(P3, P4, P5, P6)
doclayout_yolo/cfg/models/v8/yolov8-pose-p6.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-pose-p6 keypoints/pose estimation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/pose
3
+
4
+ # Parameters
5
+ nc: 1 # number of classes
6
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-p6.yaml' will call yolov8-p6.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv8.0x6 backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C2f, [128, True]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C2f, [256, True]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C2f, [512, True]]
25
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C2f, [768, True]]
27
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
28
+ - [-1, 3, C2f, [1024, True]]
29
+ - [-1, 1, SPPF, [1024, 5]] # 11
30
+
31
+ # YOLOv8.0x6 head
32
+ head:
33
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
34
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
35
+ - [-1, 3, C2, [768, False]] # 14
36
+
37
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
38
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
39
+ - [-1, 3, C2, [512, False]] # 17
40
+
41
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
42
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
43
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
44
+
45
+ - [-1, 1, Conv, [256, 3, 2]]
46
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
47
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
48
+
49
+ - [-1, 1, Conv, [512, 3, 2]]
50
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
51
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
52
+
53
+ - [-1, 1, Conv, [768, 3, 2]]
54
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
55
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
56
+
57
+ - [[20, 23, 26, 29], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5, P6)
doclayout_yolo/cfg/models/v8/yolov8-pose.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-pose keypoints/pose estimation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/pose
3
+
4
+ # Parameters
5
+ nc: 1 # number of classes
6
+ kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
7
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-pose.yaml' will call yolov8-pose.yaml with scale 'n'
8
+ # [depth, width, max_channels]
9
+ n: [0.33, 0.25, 1024]
10
+ s: [0.33, 0.50, 1024]
11
+ m: [0.67, 0.75, 768]
12
+ l: [1.00, 1.00, 512]
13
+ x: [1.00, 1.25, 512]
14
+
15
+ # YOLOv8.0n backbone
16
+ backbone:
17
+ # [from, repeats, module, args]
18
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
19
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
20
+ - [-1, 3, C2f, [128, True]]
21
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
22
+ - [-1, 6, C2f, [256, True]]
23
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
24
+ - [-1, 6, C2f, [512, True]]
25
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
26
+ - [-1, 3, C2f, [1024, True]]
27
+ - [-1, 1, SPPF, [1024, 5]] # 9
28
+
29
+ # YOLOv8.0n head
30
+ head:
31
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
32
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
33
+ - [-1, 3, C2f, [512]] # 12
34
+
35
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
36
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
37
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
38
+
39
+ - [-1, 1, Conv, [256, 3, 2]]
40
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
41
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
42
+
43
+ - [-1, 1, Conv, [512, 3, 2]]
44
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
45
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
46
+
47
+ - [[15, 18, 21], 1, Pose, [nc, kpt_shape]] # Pose(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-rtdetr.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-seg-p6.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-seg-p6 instance segmentation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/segment
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-seg-p6.yaml' will call yolov8-seg-p6.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0x6 backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [768, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [768, True]]
26
+ - [-1, 1, Conv, [1024, 3, 2]] # 9-P6/64
27
+ - [-1, 3, C2f, [1024, True]]
28
+ - [-1, 1, SPPF, [1024, 5]] # 11
29
+
30
+ # YOLOv8.0x6 head
31
+ head:
32
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
33
+ - [[-1, 8], 1, Concat, [1]] # cat backbone P5
34
+ - [-1, 3, C2, [768, False]] # 14
35
+
36
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
37
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
38
+ - [-1, 3, C2, [512, False]] # 17
39
+
40
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
41
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
42
+ - [-1, 3, C2, [256, False]] # 20 (P3/8-small)
43
+
44
+ - [-1, 1, Conv, [256, 3, 2]]
45
+ - [[-1, 17], 1, Concat, [1]] # cat head P4
46
+ - [-1, 3, C2, [512, False]] # 23 (P4/16-medium)
47
+
48
+ - [-1, 1, Conv, [512, 3, 2]]
49
+ - [[-1, 14], 1, Concat, [1]] # cat head P5
50
+ - [-1, 3, C2, [768, False]] # 26 (P5/32-large)
51
+
52
+ - [-1, 1, Conv, [768, 3, 2]]
53
+ - [[-1, 11], 1, Concat, [1]] # cat head P6
54
+ - [-1, 3, C2, [1024, False]] # 29 (P6/64-xlarge)
55
+
56
+ - [[20, 23, 26, 29], 1, Segment, [nc, 32, 256]] # Pose(P3, P4, P5, P6)
doclayout_yolo/cfg/models/v8/yolov8-seg.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-seg instance segmentation model. For Usage examples see https://docs.doclayout_yolo.com/tasks/segment
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n-seg.yaml' will call yolov8-seg.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024]
9
+ s: [0.33, 0.50, 1024]
10
+ m: [0.67, 0.75, 768]
11
+ l: [1.00, 1.00, 512]
12
+ x: [1.00, 1.25, 512]
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-world.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-World object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2fAttn, [512, 256, 8]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
37
+
38
+ - [[15, 12, 9], 1, ImagePoolingAttn, [256]] # 16 (P3/8-small)
39
+
40
+ - [15, 1, Conv, [256, 3, 2]]
41
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
42
+ - [-1, 3, C2fAttn, [512, 256, 8]] # 19 (P4/16-medium)
43
+
44
+ - [-1, 1, Conv, [512, 3, 2]]
45
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
46
+ - [-1, 3, C2fAttn, [1024, 512, 16]] # 22 (P5/32-large)
47
+
48
+ - [[15, 19, 22], 1, WorldDetect, [nc, 512, False]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8-worldv2.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8-World-v2 object detection model with P3-P5 outputs. For details see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2fAttn, [512, 256, 8]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2fAttn, [256, 128, 4]] # 15 (P3/8-small)
37
+
38
+ - [15, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2fAttn, [512, 256, 8]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2fAttn, [1024, 512, 16]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, WorldDetect, [nc, 512, True]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v8/yolov8.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # YOLOv8 object detection model with P3-P5 outputs. For Usage examples see https://docs.doclayout_yolo.com/tasks/detect
3
+
4
+ # Parameters
5
+ nc: 80 # number of classes
6
+ scales: # model compound scaling constants, i.e. 'model=yolov8n.yaml' will call yolov8.yaml with scale 'n'
7
+ # [depth, width, max_channels]
8
+ n: [0.33, 0.25, 1024] # YOLOv8n summary: 225 layers, 3157200 parameters, 3157184 gradients, 8.9 GFLOPs
9
+ s: [0.33, 0.50, 1024] # YOLOv8s summary: 225 layers, 11166560 parameters, 11166544 gradients, 28.8 GFLOPs
10
+ m: [0.67, 0.75, 768] # YOLOv8m summary: 295 layers, 25902640 parameters, 25902624 gradients, 79.3 GFLOPs
11
+ l: [1.00, 1.00, 512] # YOLOv8l summary: 365 layers, 43691520 parameters, 43691504 gradients, 165.7 GFLOPs
12
+ x: [1.00, 1.25, 512] # YOLOv8x summary: 365 layers, 68229648 parameters, 68229632 gradients, 258.5 GFLOPs
13
+
14
+ # YOLOv8.0n backbone
15
+ backbone:
16
+ # [from, repeats, module, args]
17
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
18
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
19
+ - [-1, 3, C2f, [128, True]]
20
+ - [-1, 1, Conv, [256, 3, 2]] # 3-P3/8
21
+ - [-1, 6, C2f, [256, True]]
22
+ - [-1, 1, Conv, [512, 3, 2]] # 5-P4/16
23
+ - [-1, 6, C2f, [512, True]]
24
+ - [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32
25
+ - [-1, 3, C2f, [1024, True]]
26
+ - [-1, 1, SPPF, [1024, 5]] # 9
27
+
28
+ # YOLOv8.0n head
29
+ head:
30
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
31
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
32
+ - [-1, 3, C2f, [512]] # 12
33
+
34
+ - [-1, 1, nn.Upsample, [None, 2, "nearest"]]
35
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
36
+ - [-1, 3, C2f, [256]] # 15 (P3/8-small)
37
+
38
+ - [-1, 1, Conv, [256, 3, 2]]
39
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
40
+ - [-1, 3, C2f, [512]] # 18 (P4/16-medium)
41
+
42
+ - [-1, 1, Conv, [512, 3, 2]]
43
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
44
+ - [-1, 3, C2f, [1024]] # 21 (P5/32-large)
45
+
46
+ - [[15, 18, 21], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/models/v9/yolov9c.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+
6
+ # gelan backbone
7
+ backbone:
8
+ - [-1, 1, Conv, [64, 3, 2]] # 0-P1/2
9
+ - [-1, 1, Conv, [128, 3, 2]] # 1-P2/4
10
+ - [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]] # 2
11
+ - [-1, 1, ADown, [256]] # 3-P3/8
12
+ - [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]] # 4
13
+ - [-1, 1, ADown, [512]] # 5-P4/16
14
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 6
15
+ - [-1, 1, ADown, [512]] # 7-P5/32
16
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 8
17
+ - [-1, 1, SPPELAN, [512, 256]] # 9
18
+
19
+ head:
20
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
21
+ - [[-1, 6], 1, Concat, [1]] # cat backbone P4
22
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 12
23
+
24
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
25
+ - [[-1, 4], 1, Concat, [1]] # cat backbone P3
26
+ - [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]] # 15 (P3/8-small)
27
+
28
+ - [-1, 1, ADown, [256]]
29
+ - [[-1, 12], 1, Concat, [1]] # cat head P4
30
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 18 (P4/16-medium)
31
+
32
+ - [-1, 1, ADown, [512]]
33
+ - [[-1, 9], 1, Concat, [1]] # cat head P5
34
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]] # 21 (P5/32-large)
35
+
36
+ - [[15, 18, 21], 1, Detect, [nc]] # DDetect(P3, P4, P5)
doclayout_yolo/cfg/models/v9/yolov9e.yaml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv9
2
+
3
+ # parameters
4
+ nc: 80 # number of classes
5
+
6
+ # gelan backbone
7
+ backbone:
8
+ - [-1, 1, Silence, []]
9
+ - [-1, 1, Conv, [64, 3, 2]] # 1-P1/2
10
+ - [-1, 1, Conv, [128, 3, 2]] # 2-P2/4
11
+ - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]] # 3
12
+ - [-1, 1, ADown, [256]] # 4-P3/8
13
+ - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]] # 5
14
+ - [-1, 1, ADown, [512]] # 6-P4/16
15
+ - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 7
16
+ - [-1, 1, ADown, [1024]] # 8-P5/32
17
+ - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 9
18
+
19
+ - [1, 1, CBLinear, [[64]]] # 10
20
+ - [3, 1, CBLinear, [[64, 128]]] # 11
21
+ - [5, 1, CBLinear, [[64, 128, 256]]] # 12
22
+ - [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
23
+ - [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
24
+
25
+ - [0, 1, Conv, [64, 3, 2]] # 15-P1/2
26
+ - [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
27
+ - [-1, 1, Conv, [128, 3, 2]] # 17-P2/4
28
+ - [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]] # 18
29
+ - [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]] # 19
30
+ - [-1, 1, ADown, [256]] # 20-P3/8
31
+ - [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]] # 21
32
+ - [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]] # 22
33
+ - [-1, 1, ADown, [512]] # 23-P4/16
34
+ - [[13, 14, -1], 1, CBFuse, [[3, 3]]] # 24
35
+ - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 25
36
+ - [-1, 1, ADown, [1024]] # 26-P5/32
37
+ - [[14, -1], 1, CBFuse, [[4]]] # 27
38
+ - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]] # 28
39
+ - [-1, 1, SPPELAN, [512, 256]] # 29
40
+
41
+ # gelan head
42
+ head:
43
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
44
+ - [[-1, 25], 1, Concat, [1]] # cat backbone P4
45
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]] # 32
46
+
47
+ - [-1, 1, nn.Upsample, [None, 2, 'nearest']]
48
+ - [[-1, 22], 1, Concat, [1]] # cat backbone P3
49
+ - [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]] # 35 (P3/8-small)
50
+
51
+ - [-1, 1, ADown, [256]]
52
+ - [[-1, 32], 1, Concat, [1]] # cat head P4
53
+ - [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]] # 38 (P4/16-medium)
54
+
55
+ - [-1, 1, ADown, [512]]
56
+ - [[-1, 29], 1, Concat, [1]] # cat head P5
57
+ - [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]] # 41 (P5/32-large)
58
+
59
+ # detect
60
+ - [[35, 38, 41], 1, Detect, [nc]] # Detect(P3, P4, P5)
doclayout_yolo/cfg/trackers/botsort.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Default YOLO tracker settings for BoT-SORT tracker https://github.com/NirAharon/BoT-SORT
3
+
4
+ tracker_type: botsort # tracker type, ['botsort', 'bytetrack']
5
+ track_high_thresh: 0.5 # threshold for the first association
6
+ track_low_thresh: 0.1 # threshold for the second association
7
+ new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
8
+ track_buffer: 30 # buffer to calculate the time when to remove tracks
9
+ match_thresh: 0.8 # threshold for matching tracks
10
+ # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
11
+ # mot20: False # for tracker evaluation(not used for now)
12
+
13
+ # BoT-SORT settings
14
+ gmc_method: sparseOptFlow # method of global motion compensation
15
+ # ReID model related thresh (not supported yet)
16
+ proximity_thresh: 0.5
17
+ appearance_thresh: 0.25
18
+ with_reid: False
doclayout_yolo/cfg/trackers/bytetrack.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+ # Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack
3
+
4
+ tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
5
+ track_high_thresh: 0.5 # threshold for the first association
6
+ track_low_thresh: 0.1 # threshold for the second association
7
+ new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
8
+ track_buffer: 30 # buffer to calculate the time when to remove tracks
9
+ match_thresh: 0.8 # threshold for matching tracks
10
+ # min_box_area: 10 # threshold for min box areas(for tracker evaluation, not used for now)
11
+ # mot20: False # for tracker evaluation(not used for now)
doclayout_yolo/data/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ from .base import BaseDataset
4
+ from .build import build_dataloader, build_yolo_dataset, load_inference_source
5
+ from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
6
+
7
+ __all__ = (
8
+ "BaseDataset",
9
+ "ClassificationDataset",
10
+ "SemanticDataset",
11
+ "YOLODataset",
12
+ "build_yolo_dataset",
13
+ "build_dataloader",
14
+ "load_inference_source",
15
+ )
doclayout_yolo/data/annotator.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics YOLO 🚀, AGPL-3.0 license
2
+
3
+ from pathlib import Path
4
+
5
+ from doclayout_yolo import SAM, YOLO
6
+
7
+
8
+ def auto_annotate(data, det_model="yolov8x.pt", sam_model="sam_b.pt", device="", output_dir=None):
9
+ """
10
+ Automatically annotates images using a YOLO object detection model and a SAM segmentation model.
11
+
12
+ Args:
13
+ data (str): Path to a folder containing images to be annotated.
14
+ det_model (str, optional): Pre-trained YOLO detection model. Defaults to 'yolov8x.pt'.
15
+ sam_model (str, optional): Pre-trained SAM segmentation model. Defaults to 'sam_b.pt'.
16
+ device (str, optional): Device to run the models on. Defaults to an empty string (CPU or GPU, if available).
17
+ output_dir (str | None | optional): Directory to save the annotated results.
18
+ Defaults to a 'labels' folder in the same directory as 'data'.
19
+
20
+ Example:
21
+ ```python
22
+ from doclayout_yolo.data.annotator import auto_annotate
23
+
24
+ auto_annotate(data='doclayout_yolo/assets', det_model='yolov8n.pt', sam_model='mobile_sam.pt')
25
+ ```
26
+ """
27
+ det_model = YOLO(det_model)
28
+ sam_model = SAM(sam_model)
29
+
30
+ data = Path(data)
31
+ if not output_dir:
32
+ output_dir = data.parent / f"{data.stem}_auto_annotate_labels"
33
+ Path(output_dir).mkdir(exist_ok=True, parents=True)
34
+
35
+ det_results = det_model(data, stream=True, device=device)
36
+
37
+ for result in det_results:
38
+ class_ids = result.boxes.cls.int().tolist() # noqa
39
+ if len(class_ids):
40
+ boxes = result.boxes.xyxy # Boxes object for bbox outputs
41
+ sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
42
+ segments = sam_results[0].masks.xyn # noqa
43
+
44
+ with open(f"{Path(output_dir) / Path(result.path).stem}.txt", "w") as f:
45
+ for i in range(len(segments)):
46
+ s = segments[i]
47
+ if len(s) == 0:
48
+ continue
49
+ segment = map(str, segments[i].reshape(-1).tolist())
50
+ f.write(f"{class_ids[i]} " + " ".join(segment) + "\n")