root16285 commited on
Commit
7d6df10
·
1 Parent(s): 0e1303f

Add complete FastAPI Docker app (model downloaded at build)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. Dockerfile +42 -0
  3. README.md +136 -5
  4. models/__init__.py +1 -0
  5. models/__pycache__/__init__.cpython-312.pyc +0 -0
  6. models/__pycache__/common.cpython-312.pyc +0 -0
  7. models/__pycache__/experimental.cpython-312.pyc +0 -0
  8. models/__pycache__/yolo.cpython-312.pyc +0 -0
  9. models/common.py +1110 -0
  10. models/experimental.py +130 -0
  11. models/hub/anchors.yaml +57 -0
  12. models/hub/yolov3-spp.yaml +52 -0
  13. models/hub/yolov3-tiny.yaml +42 -0
  14. models/hub/yolov3.yaml +52 -0
  15. models/hub/yolov5-bifpn.yaml +49 -0
  16. models/hub/yolov5-fpn.yaml +43 -0
  17. models/hub/yolov5-p2.yaml +55 -0
  18. models/hub/yolov5-p34.yaml +42 -0
  19. models/hub/yolov5-p6.yaml +57 -0
  20. models/hub/yolov5-p7.yaml +68 -0
  21. models/hub/yolov5-panet.yaml +49 -0
  22. models/hub/yolov5l6.yaml +61 -0
  23. models/hub/yolov5m6.yaml +61 -0
  24. models/hub/yolov5n6.yaml +61 -0
  25. models/hub/yolov5s-LeakyReLU.yaml +50 -0
  26. models/hub/yolov5s-ghost.yaml +49 -0
  27. models/hub/yolov5s-transformer.yaml +49 -0
  28. models/hub/yolov5s6.yaml +61 -0
  29. models/hub/yolov5x6.yaml +61 -0
  30. models/segment/yolov5l-seg.yaml +49 -0
  31. models/segment/yolov5m-seg.yaml +49 -0
  32. models/segment/yolov5n-seg.yaml +49 -0
  33. models/segment/yolov5s-seg.yaml +49 -0
  34. models/segment/yolov5x-seg.yaml +49 -0
  35. models/tf.py +775 -0
  36. models/yolo.py +496 -0
  37. models/yolov5l.yaml +49 -0
  38. models/yolov5m.yaml +49 -0
  39. models/yolov5n.yaml +49 -0
  40. models/yolov5s.yaml +49 -0
  41. models/yolov5x.yaml +49 -0
  42. requirements.txt +51 -0
  43. utils/__init__.py +96 -0
  44. utils/__pycache__/__init__.cpython-312.pyc +0 -0
  45. utils/__pycache__/augmentations.cpython-312.pyc +0 -0
  46. utils/__pycache__/autoanchor.cpython-312.pyc +0 -0
  47. utils/__pycache__/dataloaders.cpython-312.pyc +0 -0
  48. utils/__pycache__/downloads.cpython-312.pyc +0 -0
  49. utils/__pycache__/general.cpython-312.pyc +0 -0
  50. utils/__pycache__/metrics.cpython-312.pyc +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ yolov5s.pt
Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ libgl1-mesa-glx \
8
+ libglib2.0-0 \
9
+ libsm6 \
10
+ libxext6 \
11
+ libxrender-dev \
12
+ libgomp1 \
13
+ wget \
14
+ git \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Copy requirements and install Python dependencies
18
+ COPY requirements.txt .
19
+ RUN pip install --no-cache-dir -r requirements.txt && \
20
+ pip install --no-cache-dir fastapi uvicorn[standard]==0.32.0 python-multipart websockets
21
+
22
+ # Copy the entire application
23
+ COPY . /app
24
+
25
+ # Download YOLOv5 model if not present
26
+ RUN if [ ! -f yolov5s.pt ]; then \
27
+ wget -q https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt; \
28
+ fi
29
+
30
+ # Create user for security
31
+ RUN useradd -m -u 1000 user && chown -R user:user /app
32
+ USER user
33
+
34
+ # Set environment variables
35
+ ENV PATH="/home/user/.local/bin:$PATH"
36
+ ENV PORT=7860
37
+
38
+ # Expose the port
39
+ EXPOSE 7860
40
+
41
+ # Start the FastAPI application
42
+ CMD ["sh", "-c", "cd /app/webapp/backend && uvicorn main:app --host 0.0.0.0 --port 7860"]
README.md CHANGED
@@ -1,11 +1,142 @@
1
  ---
2
- title: Zka Detection Full
3
- emoji: 🏢
4
- colorFrom: pink
5
- colorTo: indigo
6
  sdk: docker
 
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: ZKA Marchés CI - Interface Complète
3
+ emoji: 🚦
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  license: mit
10
  ---
11
 
12
+ # 🚀 ZKA Marchés CI - Détection d'Objets (Interface Complète)
13
+
14
+ ## 📋 Description
15
+
16
+ Application complète de détection d'objets en temps réel avec **YOLOv5** pour la gestion intelligente des flux dans les marchés d'Abidjan.
17
+
18
+ ## ✨ Fonctionnalités
19
+
20
+ ### 🎥 Webcam en Direct
21
+ - Détection en temps réel via WebSocket
22
+ - Affichage FPS et latence en direct
23
+ - Ajustement du seuil de confiance dynamique
24
+ - Changement de modèle YOLOv5 (s/m/l) à la volée
25
+
26
+ ### 📤 Upload d'Images
27
+ - Upload multiple avec drag & drop
28
+ - Traitement par lot
29
+ - Résultats avec bounding boxes colorées
30
+ - Statistiques détaillées par image
31
+
32
+ ### 📊 Dashboard Interactif
33
+ - Statistiques globales en temps réel
34
+ - Graphiques interactifs avec Chart.js
35
+ - Objets les plus détectés
36
+ - Performance du modèle
37
+
38
+ ### 💾 Historique
39
+ - Toutes les détections sauvegardées
40
+ - Consultation de l'historique complet
41
+ - Possibilité d'effacer l'historique
42
+
43
+ ### 🎨 Interface Moderne
44
+ - Design responsive (mobile-friendly)
45
+ - Mode sombre/clair
46
+ - Animations fluides
47
+ - TailwindCSS + Font Awesome
48
+
49
+ ## 🛠️ Technologies
50
+
51
+ - **Backend** : FastAPI + WebSocket
52
+ - **Frontend** : HTML5 + Vanilla JavaScript + TailwindCSS
53
+ - **IA** : YOLOv5 (Ultralytics)
54
+ - **Visualisation** : Chart.js
55
+ - **Déploiement** : Docker on Hugging Face Spaces
56
+
57
+ ## 🎯 Utilisation
58
+
59
+ ### Webcam en Direct
60
+ 1. Cliquez sur l'onglet "Webcam en Direct"
61
+ 2. Cliquez sur "Démarrer"
62
+ 3. Autorisez l'accès à votre webcam
63
+ 4. La détection se fait automatiquement en temps réel
64
+
65
+ ### Upload d'Images
66
+ 1. Cliquez sur l'onglet "Upload Images"
67
+ 2. Glissez vos images dans la zone ou cliquez pour sélectionner
68
+ 3. Cliquez sur "Analyser les images"
69
+ 4. Consultez les résultats avec bounding boxes
70
+
71
+ ### Dashboard
72
+ - Consultez les statistiques globales
73
+ - Visualisez les graphiques de performance
74
+ - Analysez les tendances de détection
75
+
76
+ ### Historique
77
+ - Retrouvez toutes vos détections précédentes
78
+ - Consultez les détails de chaque détection
79
+
80
+ ## 📊 Classes Détectées
81
+
82
+ L'application détecte **80 classes d'objets** du dataset COCO, incluant:
83
+
84
+ **🚶 Personnes & Mobilité**
85
+ - Personnes, véhicules (voitures, motos, bus, camions)
86
+ - Vélos, trottinettes
87
+
88
+ **🏪 Infrastructure & Commerce**
89
+ - Mobilier urbain (bancs, tables, chaises)
90
+ - Objets de marché
91
+ - Signalisation
92
+
93
+ ## 🎓 Contexte Académique
94
+
95
+ Projet développé à **ESATIC** (École Supérieure Africaine des TIC) pour la gestion urbaine en Afrique.
96
+
97
+ **Marchés ciblés:**
98
+ - Adjamé (Abidjan)
99
+ - Treichville (Abidjan)
100
+ - Cocody (Abidjan)
101
+ - Yopougon (Abidjan)
102
+
103
+ ## 🔧 Configuration
104
+
105
+ L'application écoute sur le **port 7860** (requis par Hugging Face Spaces).
106
+
107
+ ## 📱 Compatibilité
108
+
109
+ - ✅ Desktop (Chrome, Firefox, Edge, Safari)
110
+ - ✅ Mobile (iOS, Android)
111
+ - ✅ Tablette
112
+
113
+ ## 📝 API Endpoints
114
+
115
+ - `GET /` - Interface principale
116
+ - `POST /detect` - Détection sur une image
117
+ - `POST /detect/batch` - Détection sur plusieurs images
118
+ - `WS /ws` - WebSocket pour webcam temps réel
119
+ - `GET /statistics` - Statistiques globales
120
+ - `GET /history` - Historique des détections
121
+ - `GET /docs` - Documentation API interactive
122
+
123
+ ## 🚀 Démarrage Local
124
+
125
+ ```bash
126
+ # Installer les dépendances
127
+ pip install -r requirements.txt
128
+
129
+ # Lancer le serveur
130
+ cd webapp/backend
131
+ python main.py
132
+ ```
133
+
134
+ Accéder à : http://localhost:8001
135
+
136
+ ## 📄 Licence
137
+
138
+ MIT License
139
+
140
+ ---
141
+
142
+ **Développé avec ❤️ pour la gestion intelligente des marchés africains**
models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (171 Bytes). View file
 
models/__pycache__/common.cpython-312.pyc ADDED
Binary file (79.1 kB). View file
 
models/__pycache__/experimental.cpython-312.pyc ADDED
Binary file (8.95 kB). View file
 
models/__pycache__/yolo.cpython-312.pyc ADDED
Binary file (33.3 kB). View file
 
models/common.py ADDED
@@ -0,0 +1,1110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """Common modules."""
3
+
4
+ import ast
5
+ import contextlib
6
+ import json
7
+ import math
8
+ import platform
9
+ import warnings
10
+ import zipfile
11
+ from collections import OrderedDict, namedtuple
12
+ from copy import copy
13
+ from pathlib import Path
14
+ from urllib.parse import urlparse
15
+
16
+ import cv2
17
+ import numpy as np
18
+ import pandas as pd
19
+ import requests
20
+ import torch
21
+ import torch.nn as nn
22
+ from PIL import Image
23
+ from torch.cuda import amp
24
+
25
+ # Import 'ultralytics' package or install if missing
26
+ try:
27
+ import ultralytics
28
+
29
+ assert hasattr(ultralytics, "__version__") # verify package is not directory
30
+ except (ImportError, AssertionError):
31
+ import os
32
+
33
+ os.system("pip install -U ultralytics")
34
+ import ultralytics
35
+
36
+ from ultralytics.utils.plotting import Annotator, colors, save_one_box
37
+
38
+ from utils import TryExcept
39
+ from utils.dataloaders import exif_transpose, letterbox
40
+ from utils.general import (
41
+ LOGGER,
42
+ ROOT,
43
+ Profile,
44
+ check_requirements,
45
+ check_suffix,
46
+ check_version,
47
+ colorstr,
48
+ increment_path,
49
+ is_jupyter,
50
+ make_divisible,
51
+ non_max_suppression,
52
+ scale_boxes,
53
+ xywh2xyxy,
54
+ xyxy2xywh,
55
+ yaml_load,
56
+ )
57
+ from utils.torch_utils import copy_attr, smart_inference_mode
58
+
59
+
60
+ def autopad(k, p=None, d=1):
61
+ """Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.
62
+
63
+ `k`: kernel, `p`: padding, `d`: dilation.
64
+ """
65
+ if d > 1:
66
+ k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
67
+ if p is None:
68
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
69
+ return p
70
+
71
+
72
+ class Conv(nn.Module):
73
+ """Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""
74
+
75
+ default_act = nn.SiLU() # default activation
76
+
77
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
78
+ """Initializes a standard convolution layer with optional batch normalization and activation."""
79
+ super().__init__()
80
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
81
+ self.bn = nn.BatchNorm2d(c2)
82
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
83
+
84
+ def forward(self, x):
85
+ """Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""
86
+ return self.act(self.bn(self.conv(x)))
87
+
88
+ def forward_fuse(self, x):
89
+ """Applies a fused convolution and activation function to the input tensor `x`."""
90
+ return self.act(self.conv(x))
91
+
92
+
93
+ class DWConv(Conv):
94
+ """Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""
95
+
96
+ def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
97
+ """Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output
98
+ channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act).
99
+ """
100
+ super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
101
+
102
+
103
+ class DWConvTranspose2d(nn.ConvTranspose2d):
104
+ """A depth-wise transpose convolutional layer for upsampling in neural networks, particularly in YOLOv5 models."""
105
+
106
+ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
107
+ """Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels
108
+ (c2), kernel size (k), stride (s), input padding (p1), output padding (p2).
109
+ """
110
+ super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
111
+
112
+
113
+ class TransformerLayer(nn.Module):
114
+ """Transformer layer with multihead attention and linear layers, optimized by removing LayerNorm."""
115
+
116
+ def __init__(self, c, num_heads):
117
+ """Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.
118
+
119
+ See as described in https://arxiv.org/abs/2010.11929.
120
+ """
121
+ super().__init__()
122
+ self.q = nn.Linear(c, c, bias=False)
123
+ self.k = nn.Linear(c, c, bias=False)
124
+ self.v = nn.Linear(c, c, bias=False)
125
+ self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
126
+ self.fc1 = nn.Linear(c, c, bias=False)
127
+ self.fc2 = nn.Linear(c, c, bias=False)
128
+
129
+ def forward(self, x):
130
+ """Performs forward pass using MultiheadAttention and two linear transformations with residual connections."""
131
+ x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
132
+ x = self.fc2(self.fc1(x)) + x
133
+ return x
134
+
135
+
136
+ class TransformerBlock(nn.Module):
137
+ """A Transformer block for vision tasks with convolution, position embeddings, and Transformer layers."""
138
+
139
+ def __init__(self, c1, c2, num_heads, num_layers):
140
+ """Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specified
141
+ layers.
142
+ """
143
+ super().__init__()
144
+ self.conv = None
145
+ if c1 != c2:
146
+ self.conv = Conv(c1, c2)
147
+ self.linear = nn.Linear(c2, c2) # learnable position embedding
148
+ self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
149
+ self.c2 = c2
150
+
151
+ def forward(self, x):
152
+ """Processes input through an optional convolution, followed by Transformer layers and position embeddings for
153
+ object detection.
154
+ """
155
+ if self.conv is not None:
156
+ x = self.conv(x)
157
+ b, _, w, h = x.shape
158
+ p = x.flatten(2).permute(2, 0, 1)
159
+ return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
160
+
161
+
162
+ class Bottleneck(nn.Module):
163
+ """A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""
164
+
165
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
166
+ """Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel
167
+ expansion.
168
+ """
169
+ super().__init__()
170
+ c_ = int(c2 * e) # hidden channels
171
+ self.cv1 = Conv(c1, c_, 1, 1)
172
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
173
+ self.add = shortcut and c1 == c2
174
+
175
+ def forward(self, x):
176
+ """Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is a
177
+ tensor.
178
+ """
179
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
180
+
181
+
182
+ class BottleneckCSP(nn.Module):
183
+ """CSP bottleneck layer for feature extraction with cross-stage partial connections and optional shortcuts."""
184
+
185
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
186
+ """Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,
187
+ groups, expansion.
188
+ """
189
+ super().__init__()
190
+ c_ = int(c2 * e) # hidden channels
191
+ self.cv1 = Conv(c1, c_, 1, 1)
192
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
193
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
194
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
195
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
196
+ self.act = nn.SiLU()
197
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
198
+
199
+ def forward(self, x):
200
+ """Performs forward pass by applying layers, activation, and concatenation on input x, returning feature-
201
+ enhanced output.
202
+ """
203
+ y1 = self.cv3(self.m(self.cv1(x)))
204
+ y2 = self.cv2(x)
205
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
206
+
207
+
208
+ class CrossConv(nn.Module):
209
+ """Implements a cross convolution layer with downsampling, expansion, and optional shortcut."""
210
+
211
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
212
+ """Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` output
213
+ channels.
214
+
215
+ Inputs are ch_in, ch_out, kernel, stride, groups, expansion, shortcut.
216
+ """
217
+ super().__init__()
218
+ c_ = int(c2 * e) # hidden channels
219
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
220
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
221
+ self.add = shortcut and c1 == c2
222
+
223
+ def forward(self, x):
224
+ """Performs feature sampling, expanding, and applies shortcut if channels match; expects `x` input tensor."""
225
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
226
+
227
+
228
+ class C3(nn.Module):
229
+ """Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""
230
+
231
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
232
+ """Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group
233
+ convolutions, and expansion.
234
+ """
235
+ super().__init__()
236
+ c_ = int(c2 * e) # hidden channels
237
+ self.cv1 = Conv(c1, c_, 1, 1)
238
+ self.cv2 = Conv(c1, c_, 1, 1)
239
+ self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
240
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
241
+
242
+ def forward(self, x):
243
+ """Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""
244
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
245
+
246
+
247
+ class C3x(C3):
248
+ """Extends the C3 module with cross-convolutions for enhanced feature extraction in neural networks."""
249
+
250
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
251
+ """Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,
252
+ and expansion.
253
+ """
254
+ super().__init__(c1, c2, n, shortcut, g, e)
255
+ c_ = int(c2 * e)
256
+ self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
257
+
258
+
259
+ class C3TR(C3):
260
+ """C3 module with TransformerBlock for enhanced feature extraction in object detection models."""
261
+
262
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
263
+ """Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcut
264
+ config, group, and expansion.
265
+ """
266
+ super().__init__(c1, c2, n, shortcut, g, e)
267
+ c_ = int(c2 * e)
268
+ self.m = TransformerBlock(c_, c_, 4, n)
269
+
270
+
271
+ class C3SPP(C3):
272
+ """Extends the C3 module with an SPP layer for enhanced spatial feature extraction and customizable channels."""
273
+
274
+ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
275
+ """Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernel
276
+ sizes, shortcut, group, and expansion ratio.
277
+ """
278
+ super().__init__(c1, c2, n, shortcut, g, e)
279
+ c_ = int(c2 * e)
280
+ self.m = SPP(c_, c_, k)
281
+
282
+
283
+ class C3Ghost(C3):
284
+ """Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in YOLOv5."""
285
+
286
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
287
+ """Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""
288
+ super().__init__(c1, c2, n, shortcut, g, e)
289
+ c_ = int(c2 * e) # hidden channels
290
+ self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
291
+
292
+
293
+ class SPP(nn.Module):
294
+ """Implements Spatial Pyramid Pooling (SPP) for feature extraction, ref: https://arxiv.org/abs/1406.4729."""
295
+
296
+ def __init__(self, c1, c2, k=(5, 9, 13)):
297
+ """Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input
298
+ channels), c2 (output channels), k (kernel sizes).
299
+ """
300
+ super().__init__()
301
+ c_ = c1 // 2 # hidden channels
302
+ self.cv1 = Conv(c1, c_, 1, 1)
303
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
304
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
305
+
306
+ def forward(self, x):
307
+ """Applies convolution and max pooling layers to the input tensor `x`, concatenates results, and returns output
308
+ tensor.
309
+ """
310
+ x = self.cv1(x)
311
+ with warnings.catch_warnings():
312
+ warnings.simplefilter("ignore") # suppress torch 1.9.0 max_pool2d() warning
313
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
314
+
315
+
316
+ class SPPF(nn.Module):
317
+ """Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv5 models."""
318
+
319
+ def __init__(self, c1, c2, k=5):
320
+ """Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution and
321
+ max pooling.
322
+
323
+ Equivalent to SPP(k=(5, 9, 13)).
324
+ """
325
+ super().__init__()
326
+ c_ = c1 // 2 # hidden channels
327
+ self.cv1 = Conv(c1, c_, 1, 1)
328
+ self.cv2 = Conv(c_ * 4, c2, 1, 1)
329
+ self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
330
+
331
+ def forward(self, x):
332
+ """Processes input through a series of convolutions and max pooling operations for feature extraction."""
333
+ x = self.cv1(x)
334
+ with warnings.catch_warnings():
335
+ warnings.simplefilter("ignore") # suppress torch 1.9.0 max_pool2d() warning
336
+ y1 = self.m(x)
337
+ y2 = self.m(y1)
338
+ return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
339
+
340
+
341
+ class Focus(nn.Module):
342
+ """Focuses spatial information into channel space using slicing and convolution for efficient feature extraction."""
343
+
344
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
345
+ """Initializes Focus module to concentrate width-height info into channel space with configurable convolution
346
+ parameters.
347
+ """
348
+ super().__init__()
349
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
350
+ # self.contract = Contract(gain=2)
351
+
352
+ def forward(self, x):
353
+ """Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution."""
354
+ return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
355
+ # return self.conv(self.contract(x))
356
+
357
+
358
+ class GhostConv(nn.Module):
359
+ """Implements Ghost Convolution for efficient feature extraction, see https://github.com/huawei-noah/ghostnet."""
360
+
361
+ def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
362
+ """Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channels
363
+ for efficiency.
364
+ """
365
+ super().__init__()
366
+ c_ = c2 // 2 # hidden channels
367
+ self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
368
+ self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
369
+
370
+ def forward(self, x):
371
+ """Performs forward pass, concatenating outputs of two convolutions on input `x`: shape (B,C,H,W)."""
372
+ y = self.cv1(x)
373
+ return torch.cat((y, self.cv2(y)), 1)
374
+
375
+
376
+ class GhostBottleneck(nn.Module):
377
+ """Efficient bottleneck layer using Ghost Convolutions, see https://github.com/huawei-noah/ghostnet."""
378
+
379
+ def __init__(self, c1, c2, k=3, s=1):
380
+ """Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see
381
+ https://github.com/huawei-noah/ghostnet.
382
+ """
383
+ super().__init__()
384
+ c_ = c2 // 2
385
+ self.conv = nn.Sequential(
386
+ GhostConv(c1, c_, 1, 1), # pw
387
+ DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
388
+ GhostConv(c_, c2, 1, 1, act=False),
389
+ ) # pw-linear
390
+ self.shortcut = (
391
+ nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
392
+ )
393
+
394
+ def forward(self, x):
395
+ """Processes input through conv and shortcut layers, returning their summed output."""
396
+ return self.conv(x) + self.shortcut(x)
397
+
398
+
399
+ class Contract(nn.Module):
400
+ """Contracts spatial dimensions into channel dimensions for efficient processing in neural networks."""
401
+
402
+ def __init__(self, gain=2):
403
+ """Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape
404
+ (1,64,80,80) to (1,256,40,40).
405
+ """
406
+ super().__init__()
407
+ self.gain = gain
408
+
409
+ def forward(self, x):
410
+ """Processes input tensor to expand channel dimensions by contracting spatial dimensions, yielding output shape
411
+ `(b, c*s*s, h//s, w//s)`.
412
+ """
413
+ b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
414
+ s = self.gain
415
+ x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
416
+ x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
417
+ return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
418
+
419
+
420
+ class Expand(nn.Module):
421
+ """Expands spatial dimensions by redistributing channels, e.g., from (1,64,80,80) to (1,16,160,160)."""
422
+
423
+ def __init__(self, gain=2):
424
+ """Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional
425
+ gain factor.
426
+
427
+ Example: x(1,64,80,80) to x(1,16,160,160).
428
+ """
429
+ super().__init__()
430
+ self.gain = gain
431
+
432
+ def forward(self, x):
433
+ """Processes input tensor x to expand spatial dims by redistributing channels, requiring C / gain^2 == 0."""
434
+ b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
435
+ s = self.gain
436
+ x = x.view(b, s, s, c // s**2, h, w) # x(1,2,2,16,80,80)
437
+ x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
438
+ return x.view(b, c // s**2, h * s, w * s) # x(1,16,160,160)
439
+
440
+
441
+ class Concat(nn.Module):
442
+ """Concatenates tensors along a specified dimension for efficient tensor manipulation in neural networks."""
443
+
444
+ def __init__(self, dimension=1):
445
+ """Initializes a Concat module to concatenate tensors along a specified dimension."""
446
+ super().__init__()
447
+ self.d = dimension
448
+
449
+ def forward(self, x):
450
+ """Concatenates a list of tensors along a specified dims; `x` is a list of tensors, `dimension` is an int."""
451
+ return torch.cat(x, self.d)
452
+
453
+
454
+ class DetectMultiBackend(nn.Module):
455
+ """YOLOv5 MultiBackend class for inference on various backends including PyTorch, ONNX, TensorRT, and more."""
456
+
457
+ def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
458
+ """Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""
459
+ # PyTorch: weights = *.pt
460
+ # TorchScript: *.torchscript
461
+ # ONNX Runtime: *.onnx
462
+ # ONNX OpenCV DNN: *.onnx --dnn
463
+ # OpenVINO: *_openvino_model
464
+ # CoreML: *.mlpackage
465
+ # TensorRT: *.engine
466
+ # TensorFlow SavedModel: *_saved_model
467
+ # TensorFlow GraphDef: *.pb
468
+ # TensorFlow Lite: *.tflite
469
+ # TensorFlow Edge TPU: *_edgetpu.tflite
470
+ # PaddlePaddle: *_paddle_model
471
+ from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
472
+
473
+ super().__init__()
474
+ w = str(weights[0] if isinstance(weights, list) else weights)
475
+ pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
476
+ fp16 &= pt or jit or onnx or engine or triton # FP16
477
+ nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
478
+ stride = 32 # default stride
479
+ cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA
480
+ if not (pt or triton):
481
+ w = attempt_download(w) # download if not local
482
+
483
+ if pt: # PyTorch
484
+ model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
485
+ stride = max(int(model.stride.max()), 32) # model stride
486
+ names = model.module.names if hasattr(model, "module") else model.names # get class names
487
+ model.half() if fp16 else model.float()
488
+ self.model = model # explicitly assign for to(), cpu(), cuda(), half()
489
+ elif jit: # TorchScript
490
+ LOGGER.info(f"Loading {w} for TorchScript inference...")
491
+ extra_files = {"config.txt": ""} # model metadata
492
+ model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
493
+ model.half() if fp16 else model.float()
494
+ if extra_files["config.txt"]: # load metadata dict
495
+ d = json.loads(
496
+ extra_files["config.txt"],
497
+ object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()},
498
+ )
499
+ stride, names = int(d["stride"]), d["names"]
500
+ elif dnn: # ONNX OpenCV DNN
501
+ LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")
502
+ check_requirements("opencv-python>=4.5.4")
503
+ net = cv2.dnn.readNetFromONNX(w)
504
+ elif onnx: # ONNX Runtime
505
+ LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
506
+ check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
507
+ import onnxruntime
508
+
509
+ providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]
510
+ session = onnxruntime.InferenceSession(w, providers=providers)
511
+ output_names = [x.name for x in session.get_outputs()]
512
+ meta = session.get_modelmeta().custom_metadata_map # metadata
513
+ if "stride" in meta:
514
+ stride, names = int(meta["stride"]), eval(meta["names"])
515
+ elif xml: # OpenVINO
516
+ LOGGER.info(f"Loading {w} for OpenVINO inference...")
517
+ check_requirements("openvino>=2023.0") # requires openvino-dev: https://pypi.org/project/openvino-dev/
518
+ from openvino.runtime import Core, Layout, get_batch
519
+
520
+ core = Core()
521
+ if not Path(w).is_file(): # if not *.xml
522
+ w = next(Path(w).glob("*.xml")) # get *.xml file from *_openvino_model dir
523
+ ov_model = core.read_model(model=w, weights=Path(w).with_suffix(".bin"))
524
+ if ov_model.get_parameters()[0].get_layout().empty:
525
+ ov_model.get_parameters()[0].set_layout(Layout("NCHW"))
526
+ batch_dim = get_batch(ov_model)
527
+ if batch_dim.is_static:
528
+ batch_size = batch_dim.get_length()
529
+ ov_compiled_model = core.compile_model(ov_model, device_name="AUTO") # AUTO selects best available device
530
+ stride, names = self._load_metadata(Path(w).with_suffix(".yaml")) # load metadata
531
+ elif engine: # TensorRT
532
+ LOGGER.info(f"Loading {w} for TensorRT inference...")
533
+ import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
534
+
535
+ check_version(trt.__version__, "7.0.0", hard=True) # require tensorrt>=7.0.0
536
+ if device.type == "cpu":
537
+ device = torch.device("cuda:0")
538
+ Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
539
+ logger = trt.Logger(trt.Logger.INFO)
540
+ with open(w, "rb") as f, trt.Runtime(logger) as runtime:
541
+ model = runtime.deserialize_cuda_engine(f.read())
542
+ context = model.create_execution_context()
543
+ bindings = OrderedDict()
544
+ output_names = []
545
+ fp16 = False # default updated below
546
+ dynamic = False
547
+ is_trt10 = not hasattr(model, "num_bindings")
548
+ num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)
549
+ for i in num:
550
+ if is_trt10:
551
+ name = model.get_tensor_name(i)
552
+ dtype = trt.nptype(model.get_tensor_dtype(name))
553
+ is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT
554
+ if is_input:
555
+ if -1 in tuple(model.get_tensor_shape(name)): # dynamic
556
+ dynamic = True
557
+ context.set_input_shape(name, tuple(model.get_profile_shape(name, 0)[2]))
558
+ if dtype == np.float16:
559
+ fp16 = True
560
+ else: # output
561
+ output_names.append(name)
562
+ shape = tuple(context.get_tensor_shape(name))
563
+ else:
564
+ name = model.get_binding_name(i)
565
+ dtype = trt.nptype(model.get_binding_dtype(i))
566
+ if model.binding_is_input(i):
567
+ if -1 in tuple(model.get_binding_shape(i)): # dynamic
568
+ dynamic = True
569
+ context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
570
+ if dtype == np.float16:
571
+ fp16 = True
572
+ else: # output
573
+ output_names.append(name)
574
+ shape = tuple(context.get_binding_shape(i))
575
+ im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
576
+ bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
577
+ binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
578
+ batch_size = bindings["images"].shape[0] # if dynamic, this is instead max batch size
579
+ elif coreml: # CoreML
580
+ LOGGER.info(f"Loading {w} for CoreML inference...")
581
+ import coremltools as ct
582
+
583
+ model = ct.models.MLModel(w)
584
+ elif saved_model: # TF SavedModel
585
+ LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")
586
+ import tensorflow as tf
587
+
588
+ keras = False # assume TF1 saved_model
589
+ model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
590
+ elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
591
+ LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")
592
+ import tensorflow as tf
593
+
594
+ def wrap_frozen_graph(gd, inputs, outputs):
595
+ """Wraps a TensorFlow GraphDef for inference, returning a pruned function."""
596
+ x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
597
+ ge = x.graph.as_graph_element
598
+ return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
599
+
600
+ def gd_outputs(gd):
601
+ """Generates a sorted list of graph outputs excluding NoOp nodes and inputs, formatted as '<name>:0'."""
602
+ name_list, input_list = [], []
603
+ for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
604
+ name_list.append(node.name)
605
+ input_list.extend(node.input)
606
+ return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))
607
+
608
+ gd = tf.Graph().as_graph_def() # TF GraphDef
609
+ with open(w, "rb") as f:
610
+ gd.ParseFromString(f.read())
611
+ frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
612
+ elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
613
+ try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
614
+ from tflite_runtime.interpreter import Interpreter, load_delegate
615
+ except ImportError:
616
+ import tensorflow as tf
617
+
618
+ Interpreter, load_delegate = (
619
+ tf.lite.Interpreter,
620
+ tf.lite.experimental.load_delegate,
621
+ )
622
+ if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
623
+ LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")
624
+ delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[
625
+ platform.system()
626
+ ]
627
+ interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
628
+ else: # TFLite
629
+ LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
630
+ interpreter = Interpreter(model_path=w) # load TFLite model
631
+ interpreter.allocate_tensors() # allocate
632
+ input_details = interpreter.get_input_details() # inputs
633
+ output_details = interpreter.get_output_details() # outputs
634
+ # load metadata
635
+ with contextlib.suppress(zipfile.BadZipFile):
636
+ with zipfile.ZipFile(w, "r") as model:
637
+ meta_file = model.namelist()[0]
638
+ meta = ast.literal_eval(model.read(meta_file).decode("utf-8"))
639
+ stride, names = int(meta["stride"]), meta["names"]
640
+ elif tfjs: # TF.js
641
+ raise NotImplementedError("ERROR: YOLOv5 TF.js inference is not supported")
642
+ # PaddlePaddle
643
+ elif paddle:
644
+ LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
645
+ check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle>=3.0.0")
646
+ import paddle.inference as pdi
647
+
648
+ w = Path(w)
649
+ if w.is_dir():
650
+ model_file = next(w.rglob("*.json"), None)
651
+ params_file = next(w.rglob("*.pdiparams"), None)
652
+ elif w.suffix == ".pdiparams":
653
+ model_file = w.with_name("model.json")
654
+ params_file = w
655
+ else:
656
+ raise ValueError(f"Invalid model path {w}. Provide model directory or a .pdiparams file.")
657
+
658
+ if not (model_file and params_file and model_file.is_file() and params_file.is_file()):
659
+ raise FileNotFoundError(f"Model files not found in {w}. Both .json and .pdiparams files are required.")
660
+
661
+ config = pdi.Config(str(model_file), str(params_file))
662
+ if cuda:
663
+ config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
664
+ predictor = pdi.create_predictor(config)
665
+ input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
666
+ output_names = predictor.get_output_names()
667
+
668
+ elif triton: # NVIDIA Triton Inference Server
669
+ LOGGER.info(f"Using {w} as Triton Inference Server...")
670
+ check_requirements("tritonclient[all]")
671
+ from utils.triton import TritonRemoteModel
672
+
673
+ model = TritonRemoteModel(url=w)
674
+ nhwc = model.runtime.startswith("tensorflow")
675
+ else:
676
+ raise NotImplementedError(f"ERROR: {w} is not a supported format")
677
+
678
+ # class names
679
+ if "names" not in locals():
680
+ names = yaml_load(data)["names"] if data else {i: f"class{i}" for i in range(999)}
681
+ if names[0] == "n01440764" and len(names) == 1000: # ImageNet
682
+ names = yaml_load(ROOT / "data/ImageNet.yaml")["names"] # human-readable names
683
+
684
+ self.__dict__.update(locals()) # assign all variables to self
685
+
686
+ def forward(self, im, augment=False, visualize=False):
687
+ """Performs YOLOv5 inference on input images with options for augmentation and visualization."""
688
+ _b, _ch, h, w = im.shape # batch, channel, height, width
689
+ if self.fp16 and im.dtype != torch.float16:
690
+ im = im.half() # to FP16
691
+ if self.nhwc:
692
+ im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
693
+
694
+ if self.pt: # PyTorch
695
+ y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
696
+ elif self.jit: # TorchScript
697
+ y = self.model(im)
698
+ elif self.dnn: # ONNX OpenCV DNN
699
+ im = im.cpu().numpy() # torch to numpy
700
+ self.net.setInput(im)
701
+ y = self.net.forward()
702
+ elif self.onnx: # ONNX Runtime
703
+ im = im.cpu().numpy() # torch to numpy
704
+ y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
705
+ elif self.xml: # OpenVINO
706
+ im = im.cpu().numpy() # FP32
707
+ y = list(self.ov_compiled_model(im).values())
708
+ elif self.engine: # TensorRT
709
+ if self.dynamic and im.shape != self.bindings["images"].shape:
710
+ i = self.model.get_binding_index("images")
711
+ self.context.set_binding_shape(i, im.shape) # reshape if dynamic
712
+ self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
713
+ for name in self.output_names:
714
+ i = self.model.get_binding_index(name)
715
+ self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
716
+ s = self.bindings["images"].shape
717
+ assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
718
+ self.binding_addrs["images"] = int(im.data_ptr())
719
+ self.context.execute_v2(list(self.binding_addrs.values()))
720
+ y = [self.bindings[x].data for x in sorted(self.output_names)]
721
+ elif self.coreml: # CoreML
722
+ im = im.cpu().numpy()
723
+ im = Image.fromarray((im[0] * 255).astype("uint8"))
724
+ # im = im.resize((192, 320), Image.BILINEAR)
725
+ y = self.model.predict({"image": im}) # coordinates are xywh normalized
726
+ if "confidence" in y:
727
+ box = xywh2xyxy(y["coordinates"] * [[w, h, w, h]]) # xyxy pixels
728
+ conf, cls = y["confidence"].max(1), y["confidence"].argmax(1).astype(np.float)
729
+ y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
730
+ else:
731
+ y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
732
+ elif self.paddle: # PaddlePaddle
733
+ im = im.cpu().numpy().astype(np.float32)
734
+ self.input_handle.copy_from_cpu(im)
735
+ self.predictor.run()
736
+ y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
737
+ elif self.triton: # NVIDIA Triton Inference Server
738
+ y = self.model(im)
739
+ else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
740
+ im = im.cpu().numpy()
741
+ if self.saved_model: # SavedModel
742
+ y = self.model(im, training=False) if self.keras else self.model(im)
743
+ elif self.pb: # GraphDef
744
+ y = self.frozen_func(x=self.tf.constant(im))
745
+ else: # Lite or Edge TPU
746
+ input = self.input_details[0]
747
+ int8 = input["dtype"] == np.uint8 # is TFLite quantized uint8 model
748
+ if int8:
749
+ scale, zero_point = input["quantization"]
750
+ im = (im / scale + zero_point).astype(np.uint8) # de-scale
751
+ self.interpreter.set_tensor(input["index"], im)
752
+ self.interpreter.invoke()
753
+ y = []
754
+ for output in self.output_details:
755
+ x = self.interpreter.get_tensor(output["index"])
756
+ if int8:
757
+ scale, zero_point = output["quantization"]
758
+ x = (x.astype(np.float32) - zero_point) * scale # re-scale
759
+ y.append(x)
760
+ if len(y) == 2 and len(y[1].shape) != 4:
761
+ y = list(reversed(y))
762
+ y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
763
+ y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
764
+
765
+ if isinstance(y, (list, tuple)):
766
+ return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
767
+ else:
768
+ return self.from_numpy(y)
769
+
770
+ def from_numpy(self, x):
771
+ """Converts a NumPy array to a torch tensor, maintaining device compatibility."""
772
+ return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
773
+
774
+ def warmup(self, imgsz=(1, 3, 640, 640)):
775
+ """Performs a single inference warmup to initialize model weights, accepting an `imgsz` tuple for image size."""
776
+ warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
777
+ if any(warmup_types) and (self.device.type != "cpu" or self.triton):
778
+ im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
779
+ for _ in range(2 if self.jit else 1): #
780
+ self.forward(im) # warmup
781
+
782
+ @staticmethod
783
+ def _model_type(p="path/to/model.pt"):
784
+ """Determines model type from file path or URL, supporting various export formats.
785
+
786
+ Example: path='path/to/model.onnx' -> type=onnx
787
+ """
788
+ # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
789
+ from export import export_formats
790
+ from utils.downloads import is_url
791
+
792
+ sf = list(export_formats().Suffix) # export suffixes
793
+ if not is_url(p, check=False):
794
+ check_suffix(p, sf) # checks
795
+ url = urlparse(p) # if url may be Triton inference server
796
+ types = [s in Path(p).name for s in sf]
797
+ types[8] &= not types[9] # tflite &= not edgetpu
798
+ triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
799
+ return [*types, triton]
800
+
801
+ @staticmethod
802
+ def _load_metadata(f=Path("path/to/meta.yaml")):
803
+ """Loads metadata from a YAML file, returning strides and names if the file exists, otherwise `None`."""
804
+ if f.exists():
805
+ d = yaml_load(f)
806
+ return d["stride"], d["names"] # assign stride, names
807
+ return None, None
808
+
809
+
810
+ class AutoShape(nn.Module):
811
+ """AutoShape class for robust YOLOv5 inference with preprocessing, NMS, and support for various input formats."""
812
+
813
+ conf = 0.25 # NMS confidence threshold
814
+ iou = 0.45 # NMS IoU threshold
815
+ agnostic = False # NMS class-agnostic
816
+ multi_label = False # NMS multiple labels per box
817
+ classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
818
+ max_det = 1000 # maximum number of detections per image
819
+ amp = False # Automatic Mixed Precision (AMP) inference
820
+
821
+ def __init__(self, model, verbose=True):
822
+ """Initializes YOLOv5 model for inference, setting up attributes and preparing model for evaluation."""
823
+ super().__init__()
824
+ if verbose:
825
+ LOGGER.info("Adding AutoShape... ")
826
+ copy_attr(self, model, include=("yaml", "nc", "hyp", "names", "stride", "abc"), exclude=()) # copy attributes
827
+ self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
828
+ self.pt = not self.dmb or model.pt # PyTorch model
829
+ self.model = model.eval()
830
+ if self.pt:
831
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
832
+ m.inplace = False # Detect.inplace=False for safe multithread inference
833
+ m.export = True # do not output loss values
834
+
835
+ def _apply(self, fn):
836
+ """Applies to(), cpu(), cuda(), half() etc.
837
+
838
+ to model tensors excluding parameters or registered buffers.
839
+ """
840
+ self = super()._apply(fn)
841
+ if self.pt:
842
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
843
+ m.stride = fn(m.stride)
844
+ m.grid = list(map(fn, m.grid))
845
+ if isinstance(m.anchor_grid, list):
846
+ m.anchor_grid = list(map(fn, m.anchor_grid))
847
+ return self
848
+
849
+ @smart_inference_mode()
850
+ def forward(self, ims, size=640, augment=False, profile=False):
851
+ """Performs inference on inputs with optional augment & profiling.
852
+
853
+ Supports various formats including file, URI, OpenCV, PIL, numpy, torch.
854
+ """
855
+ # For size(height=640, width=1280), RGB images example inputs are:
856
+ # file: ims = 'data/images/zidane.jpg' # str or PosixPath
857
+ # URI: = 'https://ultralytics.com/images/zidane.jpg'
858
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
859
+ # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
860
+ # numpy: = np.zeros((640,1280,3)) # HWC
861
+ # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
862
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
863
+
864
+ dt = (Profile(), Profile(), Profile())
865
+ with dt[0]:
866
+ if isinstance(size, int): # expand
867
+ size = (size, size)
868
+ p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
869
+ autocast = self.amp and (p.device.type != "cpu") # Automatic Mixed Precision (AMP) inference
870
+ if isinstance(ims, torch.Tensor): # torch
871
+ with amp.autocast(autocast):
872
+ return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
873
+
874
+ # Pre-process
875
+ n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
876
+ shape0, shape1, files = [], [], [] # image and inference shapes, filenames
877
+ for i, im in enumerate(ims):
878
+ f = f"image{i}" # filename
879
+ if isinstance(im, (str, Path)): # filename or uri
880
+ im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im), im
881
+ im = np.asarray(exif_transpose(im))
882
+ elif isinstance(im, Image.Image): # PIL Image
883
+ im, f = np.asarray(exif_transpose(im)), getattr(im, "filename", f) or f
884
+ files.append(Path(f).with_suffix(".jpg").name)
885
+ if im.shape[0] < 5: # image in CHW
886
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
887
+ im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
888
+ s = im.shape[:2] # HWC
889
+ shape0.append(s) # image shape
890
+ g = max(size) / max(s) # gain
891
+ shape1.append([int(y * g) for y in s])
892
+ ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
893
+ shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
894
+ x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
895
+ x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
896
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
897
+
898
+ with amp.autocast(autocast):
899
+ # Inference
900
+ with dt[1]:
901
+ y = self.model(x, augment=augment) # forward
902
+
903
+ # Post-process
904
+ with dt[2]:
905
+ y = non_max_suppression(
906
+ y if self.dmb else y[0],
907
+ self.conf,
908
+ self.iou,
909
+ self.classes,
910
+ self.agnostic,
911
+ self.multi_label,
912
+ max_det=self.max_det,
913
+ ) # NMS
914
+ for i in range(n):
915
+ scale_boxes(shape1, y[i][:, :4], shape0[i])
916
+
917
+ return Detections(ims, y, files, dt, self.names, x.shape)
918
+
919
+
920
+ class Detections:
921
+ """Manages YOLOv5 detection results with methods for visualization, saving, cropping, and exporting detections."""
922
+
923
+ def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
924
+ """Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""
925
+ super().__init__()
926
+ d = pred[0].device # device
927
+ gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
928
+ self.ims = ims # list of images as numpy arrays
929
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
930
+ self.names = names # class names
931
+ self.files = files # image filenames
932
+ self.times = times # profiling times
933
+ self.xyxy = pred # xyxy pixels
934
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
935
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
936
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
937
+ self.n = len(self.pred) # number of images (batch size)
938
+ self.t = tuple(x.t / self.n * 1e3 for x in times) # timestamps (ms)
939
+ self.s = tuple(shape) # inference BCHW shape
940
+
941
+ def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path("")):
942
+ """Executes model predictions, displaying and/or saving outputs with optional crops and labels."""
943
+ s, crops = "", []
944
+ for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
945
+ s += f"\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} " # string
946
+ if pred.shape[0]:
947
+ for c in pred[:, -1].unique():
948
+ n = (pred[:, -1] == c).sum() # detections per class
949
+ s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
950
+ s = s.rstrip(", ")
951
+ if show or save or render or crop:
952
+ annotator = Annotator(im, example=str(self.names))
953
+ for *box, conf, cls in reversed(pred): # xyxy, confidence, class
954
+ label = f"{self.names[int(cls)]} {conf:.2f}"
955
+ if crop:
956
+ file = save_dir / "crops" / self.names[int(cls)] / self.files[i] if save else None
957
+ crops.append(
958
+ {
959
+ "box": box,
960
+ "conf": conf,
961
+ "cls": cls,
962
+ "label": label,
963
+ "im": save_one_box(box, im, file=file, save=save),
964
+ }
965
+ )
966
+ else: # all others
967
+ annotator.box_label(box, label if labels else "", color=colors(cls))
968
+ im = annotator.im
969
+ else:
970
+ s += "(no detections)"
971
+
972
+ im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
973
+ if show:
974
+ if is_jupyter():
975
+ from IPython.display import display
976
+
977
+ display(im)
978
+ else:
979
+ im.show(self.files[i])
980
+ if save:
981
+ f = self.files[i]
982
+ im.save(save_dir / f) # save
983
+ if i == self.n - 1:
984
+ LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
985
+ if render:
986
+ self.ims[i] = np.asarray(im)
987
+ if pprint:
988
+ s = s.lstrip("\n")
989
+ return f"{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}" % self.t
990
+ if crop:
991
+ if save:
992
+ LOGGER.info(f"Saved results to {save_dir}\n")
993
+ return crops
994
+
995
+ @TryExcept("Showing images is not supported in this environment")
996
+ def show(self, labels=True):
997
+ """Displays detection results with optional labels.
998
+
999
+ Usage: show(labels=True)
1000
+ """
1001
+ self._run(show=True, labels=labels) # show results
1002
+
1003
+ def save(self, labels=True, save_dir="runs/detect/exp", exist_ok=False):
1004
+ """Saves detection results with optional labels to a specified directory.
1005
+
1006
+ Usage: save(labels=True, save_dir='runs/detect/exp', exist_ok=False)
1007
+ """
1008
+ save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
1009
+ self._run(save=True, labels=labels, save_dir=save_dir) # save results
1010
+
1011
+ def crop(self, save=True, save_dir="runs/detect/exp", exist_ok=False):
1012
+ """Crops detection results, optionally saves them to a directory.
1013
+
1014
+ Args: save (bool), save_dir (str), exist_ok (bool).
1015
+ """
1016
+ save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
1017
+ return self._run(crop=True, save=save, save_dir=save_dir) # crop results
1018
+
1019
+ def render(self, labels=True):
1020
+ """Renders detection results with optional labels on images; args: labels (bool) indicating label inclusion."""
1021
+ self._run(render=True, labels=labels) # render results
1022
+ return self.ims
1023
+
1024
+ def pandas(self):
1025
+ """Returns detections as pandas DataFrames for various box formats (xyxy, xyxyn, xywh, xywhn).
1026
+
1027
+ Example: print(results.pandas().xyxy[0]).
1028
+ """
1029
+ new = copy(self) # return copy
1030
+ ca = "xmin", "ymin", "xmax", "ymax", "confidence", "class", "name" # xyxy columns
1031
+ cb = "xcenter", "ycenter", "width", "height", "confidence", "class", "name" # xywh columns
1032
+ for k, c in zip(["xyxy", "xyxyn", "xywh", "xywhn"], [ca, ca, cb, cb]):
1033
+ a = [[[*x[:5], int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
1034
+ setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
1035
+ return new
1036
+
1037
+ def tolist(self):
1038
+ """Converts a Detections object into a list of individual detection results for iteration.
1039
+
1040
+ Example: for result in results.tolist():
1041
+ """
1042
+ r = range(self.n) # iterable
1043
+ return [
1044
+ Detections(
1045
+ [self.ims[i]],
1046
+ [self.pred[i]],
1047
+ [self.files[i]],
1048
+ self.times,
1049
+ self.names,
1050
+ self.s,
1051
+ )
1052
+ for i in r
1053
+ ]
1054
+
1055
+ def print(self):
1056
+ """Logs the string representation of the current object's state via the LOGGER."""
1057
+ LOGGER.info(self.__str__())
1058
+
1059
+ def __len__(self):
1060
+ """Returns the number of results stored, overrides the default len(results)."""
1061
+ return self.n
1062
+
1063
+ def __str__(self):
1064
+ """Returns a string representation of the model's results, suitable for printing, overrides default
1065
+ print(results).
1066
+ """
1067
+ return self._run(pprint=True) # print results
1068
+
1069
+ def __repr__(self):
1070
+ """Returns a string representation of the YOLOv5 object, including its class and formatted results."""
1071
+ return f"YOLOv5 {self.__class__} instance\n" + self.__str__()
1072
+
1073
+
1074
+ class Proto(nn.Module):
1075
+ """YOLOv5 mask Proto module for segmentation models, performing convolutions and upsampling on input tensors."""
1076
+
1077
+ def __init__(self, c1, c_=256, c2=32):
1078
+ """Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""
1079
+ super().__init__()
1080
+ self.cv1 = Conv(c1, c_, k=3)
1081
+ self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
1082
+ self.cv2 = Conv(c_, c_, k=3)
1083
+ self.cv3 = Conv(c_, c2)
1084
+
1085
+ def forward(self, x):
1086
+ """Performs a forward pass using convolutional layers and upsampling on input tensor `x`."""
1087
+ return self.cv3(self.cv2(self.upsample(self.cv1(x))))
1088
+
1089
+
1090
+ class Classify(nn.Module):
1091
+ """YOLOv5 classification head with convolution, pooling, and dropout layers for channel transformation."""
1092
+
1093
+ def __init__(
1094
+ self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0
1095
+ ): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
1096
+ """Initializes YOLOv5 classification head with convolution, pooling, and dropout layers for input to output
1097
+ channel transformation.
1098
+ """
1099
+ super().__init__()
1100
+ c_ = 1280 # efficientnet_b0 size
1101
+ self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
1102
+ self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
1103
+ self.drop = nn.Dropout(p=dropout_p, inplace=True)
1104
+ self.linear = nn.Linear(c_, c2) # to x(b,c2)
1105
+
1106
+ def forward(self, x):
1107
+ """Processes input through conv, pool, drop, and linear layers; supports list concatenation input."""
1108
+ if isinstance(x, list):
1109
+ x = torch.cat(x, 1)
1110
+ return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
models/experimental.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """Experimental modules."""
3
+
4
+ import math
5
+
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn as nn
9
+ from ultralytics.utils.patches import torch_load
10
+
11
+ from utils.downloads import attempt_download
12
+
13
+
14
+ class Sum(nn.Module):
15
+ """Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070."""
16
+
17
+ def __init__(self, n, weight=False):
18
+ """Initializes a module to sum outputs of layers with number of inputs `n` and optional weighting, supporting 2+
19
+ inputs.
20
+ """
21
+ super().__init__()
22
+ self.weight = weight # apply weights boolean
23
+ self.iter = range(n - 1) # iter object
24
+ if weight:
25
+ self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
26
+
27
+ def forward(self, x):
28
+ """Processes input through a customizable weighted sum of `n` inputs, optionally applying learned weights."""
29
+ y = x[0] # no weight
30
+ if self.weight:
31
+ w = torch.sigmoid(self.w) * 2
32
+ for i in self.iter:
33
+ y = y + x[i + 1] * w[i]
34
+ else:
35
+ for i in self.iter:
36
+ y = y + x[i + 1]
37
+ return y
38
+
39
+
40
+ class MixConv2d(nn.Module):
41
+ """Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595."""
42
+
43
+ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
44
+ """Initializes MixConv2d with mixed depth-wise convolutional layers, taking input and output channels (c1, c2),
45
+ kernel sizes (k), stride (s), and channel distribution strategy (equal_ch).
46
+ """
47
+ super().__init__()
48
+ n = len(k) # number of convolutions
49
+ if equal_ch: # equal c_ per group
50
+ i = torch.linspace(0, n - 1e-6, c2).floor() # c2 indices
51
+ c_ = [(i == g).sum() for g in range(n)] # intermediate channels
52
+ else: # equal weight.numel() per group
53
+ b = [c2] + [0] * n
54
+ a = np.eye(n + 1, n, k=-1)
55
+ a -= np.roll(a, 1, axis=1)
56
+ a *= np.array(k) ** 2
57
+ a[0] = 1
58
+ c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
59
+
60
+ self.m = nn.ModuleList(
61
+ [nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]
62
+ )
63
+ self.bn = nn.BatchNorm2d(c2)
64
+ self.act = nn.SiLU()
65
+
66
+ def forward(self, x):
67
+ """Performs forward pass by applying SiLU activation on batch-normalized concatenated convolutional layer
68
+ outputs.
69
+ """
70
+ return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
71
+
72
+
73
+ class Ensemble(nn.ModuleList):
74
+ """Ensemble of models."""
75
+
76
+ def __init__(self):
77
+ """Initializes an ensemble of models to be used for aggregated predictions."""
78
+ super().__init__()
79
+
80
+ def forward(self, x, augment=False, profile=False, visualize=False):
81
+ """Performs forward pass aggregating outputs from an ensemble of models.."""
82
+ y = [module(x, augment, profile, visualize)[0] for module in self]
83
+ # y = torch.stack(y).max(0)[0] # max ensemble
84
+ # y = torch.stack(y).mean(0) # mean ensemble
85
+ y = torch.cat(y, 1) # nms ensemble
86
+ return y, None # inference, train output
87
+
88
+
89
+ def attempt_load(weights, device=None, inplace=True, fuse=True):
90
+ """Loads and fuses an ensemble or single YOLOv5 model from weights, handling device placement and model adjustments.
91
+
92
+ Example inputs: weights=[a,b,c] or a single model weights=[a] or weights=a.
93
+ """
94
+ from models.yolo import Detect, Model
95
+
96
+ model = Ensemble()
97
+ for w in weights if isinstance(weights, list) else [weights]:
98
+ ckpt = torch_load(attempt_download(w), map_location="cpu") # load
99
+ ckpt = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
100
+
101
+ # Model compatibility updates
102
+ if not hasattr(ckpt, "stride"):
103
+ ckpt.stride = torch.tensor([32.0])
104
+ if hasattr(ckpt, "names") and isinstance(ckpt.names, (list, tuple)):
105
+ ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
106
+
107
+ model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, "fuse") else ckpt.eval()) # model in eval mode
108
+
109
+ # Module updates
110
+ for m in model.modules():
111
+ t = type(m)
112
+ if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
113
+ m.inplace = inplace
114
+ if t is Detect and not isinstance(m.anchor_grid, list):
115
+ delattr(m, "anchor_grid")
116
+ setattr(m, "anchor_grid", [torch.zeros(1)] * m.nl)
117
+ elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
118
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
119
+
120
+ # Return model
121
+ if len(model) == 1:
122
+ return model[-1]
123
+
124
+ # Return detection ensemble
125
+ print(f"Ensemble created with {weights}\n")
126
+ for k in "names", "nc", "yaml":
127
+ setattr(model, k, getattr(model[0], k))
128
+ model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
129
+ assert all(model[0].nc == m.nc for m in model), f"Models have different class counts: {[m.nc for m in model]}"
130
+ return model
models/hub/anchors.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Default anchors for COCO data
4
+
5
+ # P5 -------------------------------------------------------------------------------------------------------------------
6
+ # P5-640:
7
+ anchors_p5_640:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # P6 -------------------------------------------------------------------------------------------------------------------
13
+ # P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
14
+ anchors_p6_640:
15
+ - [9, 11, 21, 19, 17, 41] # P3/8
16
+ - [43, 32, 39, 70, 86, 64] # P4/16
17
+ - [65, 131, 134, 130, 120, 265] # P5/32
18
+ - [282, 180, 247, 354, 512, 387] # P6/64
19
+
20
+ # P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
21
+ anchors_p6_1280:
22
+ - [19, 27, 44, 40, 38, 94] # P3/8
23
+ - [96, 68, 86, 152, 180, 137] # P4/16
24
+ - [140, 301, 303, 264, 238, 542] # P5/32
25
+ - [436, 615, 739, 380, 925, 792] # P6/64
26
+
27
+ # P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
28
+ anchors_p6_1920:
29
+ - [28, 41, 67, 59, 57, 141] # P3/8
30
+ - [144, 103, 129, 227, 270, 205] # P4/16
31
+ - [209, 452, 455, 396, 358, 812] # P5/32
32
+ - [653, 922, 1109, 570, 1387, 1187] # P6/64
33
+
34
+ # P7 -------------------------------------------------------------------------------------------------------------------
35
+ # P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
36
+ anchors_p7_640:
37
+ - [11, 11, 13, 30, 29, 20] # P3/8
38
+ - [30, 46, 61, 38, 39, 92] # P4/16
39
+ - [78, 80, 146, 66, 79, 163] # P5/32
40
+ - [149, 150, 321, 143, 157, 303] # P6/64
41
+ - [257, 402, 359, 290, 524, 372] # P7/128
42
+
43
+ # P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
44
+ anchors_p7_1280:
45
+ - [19, 22, 54, 36, 32, 77] # P3/8
46
+ - [70, 83, 138, 71, 75, 173] # P4/16
47
+ - [165, 159, 148, 334, 375, 151] # P5/32
48
+ - [334, 317, 251, 626, 499, 474] # P6/64
49
+ - [750, 326, 534, 814, 1079, 818] # P7/128
50
+
51
+ # P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
52
+ anchors_p7_1920:
53
+ - [29, 34, 81, 55, 47, 115] # P3/8
54
+ - [105, 124, 207, 107, 113, 259] # P4/16
55
+ - [247, 238, 222, 500, 563, 227] # P5/32
56
+ - [501, 476, 376, 939, 749, 711] # P6/64
57
+ - [1126, 489, 801, 1222, 1618, 1227] # P7/128
models/hub/yolov3-spp.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [32, 3, 1]], # 0
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
18
+ [-1, 1, Bottleneck, [64]],
19
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
20
+ [-1, 2, Bottleneck, [128]],
21
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
22
+ [-1, 8, Bottleneck, [256]],
23
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
24
+ [-1, 8, Bottleneck, [512]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
26
+ [-1, 4, Bottleneck, [1024]], # 10
27
+ ]
28
+
29
+ # YOLOv3-SPP head
30
+ head: [
31
+ [-1, 1, Bottleneck, [1024, False]],
32
+ [-1, 1, SPP, [512, [5, 9, 13]]],
33
+ [-1, 1, Conv, [1024, 3, 1]],
34
+ [-1, 1, Conv, [512, 1, 1]],
35
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
36
+
37
+ [-2, 1, Conv, [256, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Bottleneck, [512, False]],
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
44
+
45
+ [-2, 1, Conv, [128, 1, 1]],
46
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
47
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
48
+ [-1, 1, Bottleneck, [256, False]],
49
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
50
+
51
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
models/hub/yolov3-tiny.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 14, 23, 27, 37, 58] # P4/16
9
+ - [81, 82, 135, 169, 344, 319] # P5/32
10
+
11
+ # YOLOv3-tiny backbone
12
+ backbone:
13
+ # [from, number, module, args]
14
+ [
15
+ [-1, 1, Conv, [16, 3, 1]], # 0
16
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
17
+ [-1, 1, Conv, [32, 3, 1]],
18
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
19
+ [-1, 1, Conv, [64, 3, 1]],
20
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
21
+ [-1, 1, Conv, [128, 3, 1]],
22
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
23
+ [-1, 1, Conv, [256, 3, 1]],
24
+ [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
25
+ [-1, 1, Conv, [512, 3, 1]],
26
+ [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
27
+ [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
28
+ ]
29
+
30
+ # YOLOv3-tiny head
31
+ head: [
32
+ [-1, 1, Conv, [1024, 3, 1]],
33
+ [-1, 1, Conv, [256, 1, 1]],
34
+ [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
35
+
36
+ [-2, 1, Conv, [128, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
39
+ [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
40
+
41
+ [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
42
+ ]
models/hub/yolov3.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # darknet53 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [32, 3, 1]], # 0
17
+ [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
18
+ [-1, 1, Bottleneck, [64]],
19
+ [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
20
+ [-1, 2, Bottleneck, [128]],
21
+ [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
22
+ [-1, 8, Bottleneck, [256]],
23
+ [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
24
+ [-1, 8, Bottleneck, [512]],
25
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
26
+ [-1, 4, Bottleneck, [1024]], # 10
27
+ ]
28
+
29
+ # YOLOv3 head
30
+ head: [
31
+ [-1, 1, Bottleneck, [1024, False]],
32
+ [-1, 1, Conv, [512, 1, 1]],
33
+ [-1, 1, Conv, [1024, 3, 1]],
34
+ [-1, 1, Conv, [512, 1, 1]],
35
+ [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
36
+
37
+ [-2, 1, Conv, [256, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39
+ [[-1, 8], 1, Concat, [1]], # cat backbone P4
40
+ [-1, 1, Bottleneck, [512, False]],
41
+ [-1, 1, Bottleneck, [512, False]],
42
+ [-1, 1, Conv, [256, 1, 1]],
43
+ [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
44
+
45
+ [-2, 1, Conv, [128, 1, 1]],
46
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
47
+ [[-1, 6], 1, Concat, [1]], # cat backbone P3
48
+ [-1, 1, Bottleneck, [256, False]],
49
+ [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
50
+
51
+ [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
52
+ ]
models/hub/yolov5-bifpn.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 BiFPN head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/hub/yolov5-fpn.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 FPN head
29
+ head: [
30
+ [-1, 3, C3, [1024, False]], # 10 (P5/32-large)
31
+
32
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
33
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
34
+ [-1, 1, Conv, [512, 1, 1]],
35
+ [-1, 3, C3, [512, False]], # 14 (P4/16-medium)
36
+
37
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
38
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
39
+ [-1, 1, Conv, [256, 1, 1]],
40
+ [-1, 3, C3, [256, False]], # 18 (P3/8-small)
41
+
42
+ [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
43
+ ]
models/hub/yolov5-p2.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [
13
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
14
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
15
+ [-1, 3, C3, [128]],
16
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
17
+ [-1, 6, C3, [256]],
18
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
19
+ [-1, 9, C3, [512]],
20
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
21
+ [-1, 3, C3, [1024]],
22
+ [-1, 1, SPPF, [1024, 5]], # 9
23
+ ]
24
+
25
+ # YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
26
+ head: [
27
+ [-1, 1, Conv, [512, 1, 1]],
28
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
30
+ [-1, 3, C3, [512, False]], # 13
31
+
32
+ [-1, 1, Conv, [256, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
35
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
36
+
37
+ [-1, 1, Conv, [128, 1, 1]],
38
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39
+ [[-1, 2], 1, Concat, [1]], # cat backbone P2
40
+ [-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
41
+
42
+ [-1, 1, Conv, [128, 3, 2]],
43
+ [[-1, 18], 1, Concat, [1]], # cat head P3
44
+ [-1, 3, C3, [256, False]], # 24 (P3/8-small)
45
+
46
+ [-1, 1, Conv, [256, 3, 2]],
47
+ [[-1, 14], 1, Concat, [1]], # cat head P4
48
+ [-1, 3, C3, [512, False]], # 27 (P4/16-medium)
49
+
50
+ [-1, 1, Conv, [512, 3, 2]],
51
+ [[-1, 10], 1, Concat, [1]], # cat head P5
52
+ [-1, 3, C3, [1024, False]], # 30 (P5/32-large)
53
+
54
+ [[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
55
+ ]
models/hub/yolov5-p34.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [
13
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
14
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
15
+ [-1, 3, C3, [128]],
16
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
17
+ [-1, 6, C3, [256]],
18
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
19
+ [-1, 9, C3, [512]],
20
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
21
+ [-1, 3, C3, [1024]],
22
+ [-1, 1, SPPF, [1024, 5]], # 9
23
+ ]
24
+
25
+ # YOLOv5 v6.0 head with (P3, P4) outputs
26
+ head: [
27
+ [-1, 1, Conv, [512, 1, 1]],
28
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
30
+ [-1, 3, C3, [512, False]], # 13
31
+
32
+ [-1, 1, Conv, [256, 1, 1]],
33
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
35
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
36
+
37
+ [-1, 1, Conv, [256, 3, 2]],
38
+ [[-1, 14], 1, Concat, [1]], # cat head P4
39
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
40
+
41
+ [[17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4)
42
+ ]
models/hub/yolov5-p6.yaml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [
13
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
14
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
15
+ [-1, 3, C3, [128]],
16
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
17
+ [-1, 6, C3, [256]],
18
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
19
+ [-1, 9, C3, [512]],
20
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
21
+ [-1, 3, C3, [768]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, SPPF, [1024, 5]], # 11
25
+ ]
26
+
27
+ # YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
28
+ head: [
29
+ [-1, 1, Conv, [768, 1, 1]],
30
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
32
+ [-1, 3, C3, [768, False]], # 15
33
+
34
+ [-1, 1, Conv, [512, 1, 1]],
35
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
37
+ [-1, 3, C3, [512, False]], # 19
38
+
39
+ [-1, 1, Conv, [256, 1, 1]],
40
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
41
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
42
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
43
+
44
+ [-1, 1, Conv, [256, 3, 2]],
45
+ [[-1, 20], 1, Concat, [1]], # cat head P4
46
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
47
+
48
+ [-1, 1, Conv, [512, 3, 2]],
49
+ [[-1, 16], 1, Concat, [1]], # cat head P5
50
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
51
+
52
+ [-1, 1, Conv, [768, 3, 2]],
53
+ [[-1, 12], 1, Concat, [1]], # cat head P6
54
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
55
+
56
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
57
+ ]
models/hub/yolov5-p7.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
8
+
9
+ # YOLOv5 v6.0 backbone
10
+ backbone:
11
+ # [from, number, module, args]
12
+ [
13
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
14
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
15
+ [-1, 3, C3, [128]],
16
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
17
+ [-1, 6, C3, [256]],
18
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
19
+ [-1, 9, C3, [512]],
20
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
21
+ [-1, 3, C3, [768]],
22
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
23
+ [-1, 3, C3, [1024]],
24
+ [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
25
+ [-1, 3, C3, [1280]],
26
+ [-1, 1, SPPF, [1280, 5]], # 13
27
+ ]
28
+
29
+ # YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
30
+ head: [
31
+ [-1, 1, Conv, [1024, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
33
+ [[-1, 10], 1, Concat, [1]], # cat backbone P6
34
+ [-1, 3, C3, [1024, False]], # 17
35
+
36
+ [-1, 1, Conv, [768, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
38
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
39
+ [-1, 3, C3, [768, False]], # 21
40
+
41
+ [-1, 1, Conv, [512, 1, 1]],
42
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
43
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
44
+ [-1, 3, C3, [512, False]], # 25
45
+
46
+ [-1, 1, Conv, [256, 1, 1]],
47
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
48
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
49
+ [-1, 3, C3, [256, False]], # 29 (P3/8-small)
50
+
51
+ [-1, 1, Conv, [256, 3, 2]],
52
+ [[-1, 26], 1, Concat, [1]], # cat head P4
53
+ [-1, 3, C3, [512, False]], # 32 (P4/16-medium)
54
+
55
+ [-1, 1, Conv, [512, 3, 2]],
56
+ [[-1, 22], 1, Concat, [1]], # cat head P5
57
+ [-1, 3, C3, [768, False]], # 35 (P5/32-large)
58
+
59
+ [-1, 1, Conv, [768, 3, 2]],
60
+ [[-1, 18], 1, Concat, [1]], # cat head P6
61
+ [-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
62
+
63
+ [-1, 1, Conv, [1024, 3, 2]],
64
+ [[-1, 14], 1, Concat, [1]], # cat head P7
65
+ [-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
66
+
67
+ [[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
68
+ ]
models/hub/yolov5-panet.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 PANet head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/hub/yolov5l6.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [19, 27, 44, 40, 38, 94] # P3/8
9
+ - [96, 68, 86, 152, 180, 137] # P4/16
10
+ - [140, 301, 303, 264, 238, 542] # P5/32
11
+ - [436, 615, 739, 380, 925, 792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [
17
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
27
+ [-1, 3, C3, [1024]],
28
+ [-1, 1, SPPF, [1024, 5]], # 11
29
+ ]
30
+
31
+ # YOLOv5 v6.0 head
32
+ head: [
33
+ [-1, 1, Conv, [768, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
36
+ [-1, 3, C3, [768, False]], # 15
37
+
38
+ [-1, 1, Conv, [512, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
40
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
41
+ [-1, 3, C3, [512, False]], # 19
42
+
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
45
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
46
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
47
+
48
+ [-1, 1, Conv, [256, 3, 2]],
49
+ [[-1, 20], 1, Concat, [1]], # cat head P4
50
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
51
+
52
+ [-1, 1, Conv, [512, 3, 2]],
53
+ [[-1, 16], 1, Concat, [1]], # cat head P5
54
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
55
+
56
+ [-1, 1, Conv, [768, 3, 2]],
57
+ [[-1, 12], 1, Concat, [1]], # cat head P6
58
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
59
+
60
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
61
+ ]
models/hub/yolov5m6.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.67 # model depth multiple
6
+ width_multiple: 0.75 # layer channel multiple
7
+ anchors:
8
+ - [19, 27, 44, 40, 38, 94] # P3/8
9
+ - [96, 68, 86, 152, 180, 137] # P4/16
10
+ - [140, 301, 303, 264, 238, 542] # P5/32
11
+ - [436, 615, 739, 380, 925, 792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [
17
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
27
+ [-1, 3, C3, [1024]],
28
+ [-1, 1, SPPF, [1024, 5]], # 11
29
+ ]
30
+
31
+ # YOLOv5 v6.0 head
32
+ head: [
33
+ [-1, 1, Conv, [768, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
36
+ [-1, 3, C3, [768, False]], # 15
37
+
38
+ [-1, 1, Conv, [512, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
40
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
41
+ [-1, 3, C3, [512, False]], # 19
42
+
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
45
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
46
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
47
+
48
+ [-1, 1, Conv, [256, 3, 2]],
49
+ [[-1, 20], 1, Concat, [1]], # cat head P4
50
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
51
+
52
+ [-1, 1, Conv, [512, 3, 2]],
53
+ [[-1, 16], 1, Concat, [1]], # cat head P5
54
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
55
+
56
+ [-1, 1, Conv, [768, 3, 2]],
57
+ [[-1, 12], 1, Concat, [1]], # cat head P6
58
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
59
+
60
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
61
+ ]
models/hub/yolov5n6.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.25 # layer channel multiple
7
+ anchors:
8
+ - [19, 27, 44, 40, 38, 94] # P3/8
9
+ - [96, 68, 86, 152, 180, 137] # P4/16
10
+ - [140, 301, 303, 264, 238, 542] # P5/32
11
+ - [436, 615, 739, 380, 925, 792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [
17
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
27
+ [-1, 3, C3, [1024]],
28
+ [-1, 1, SPPF, [1024, 5]], # 11
29
+ ]
30
+
31
+ # YOLOv5 v6.0 head
32
+ head: [
33
+ [-1, 1, Conv, [768, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
36
+ [-1, 3, C3, [768, False]], # 15
37
+
38
+ [-1, 1, Conv, [512, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
40
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
41
+ [-1, 3, C3, [512, False]], # 19
42
+
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
45
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
46
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
47
+
48
+ [-1, 1, Conv, [256, 3, 2]],
49
+ [[-1, 20], 1, Concat, [1]], # cat head P4
50
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
51
+
52
+ [-1, 1, Conv, [512, 3, 2]],
53
+ [[-1, 16], 1, Concat, [1]], # cat head P5
54
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
55
+
56
+ [-1, 1, Conv, [768, 3, 2]],
57
+ [[-1, 12], 1, Concat, [1]], # cat head P6
58
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
59
+
60
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
61
+ ]
models/hub/yolov5s-LeakyReLU.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
6
+ depth_multiple: 0.33 # model depth multiple
7
+ width_multiple: 0.50 # layer channel multiple
8
+ anchors:
9
+ - [10, 13, 16, 30, 33, 23] # P3/8
10
+ - [30, 61, 62, 45, 59, 119] # P4/16
11
+ - [116, 90, 156, 198, 373, 326] # P5/32
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [
17
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [1024]],
26
+ [-1, 1, SPPF, [1024, 5]], # 9
27
+ ]
28
+
29
+ # YOLOv5 v6.0 head
30
+ head: [
31
+ [-1, 1, Conv, [512, 1, 1]],
32
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
33
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
34
+ [-1, 3, C3, [512, False]], # 13
35
+
36
+ [-1, 1, Conv, [256, 1, 1]],
37
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
38
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
39
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
40
+
41
+ [-1, 1, Conv, [256, 3, 2]],
42
+ [[-1, 14], 1, Concat, [1]], # cat head P4
43
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
44
+
45
+ [-1, 1, Conv, [512, 3, 2]],
46
+ [[-1, 10], 1, Concat, [1]], # cat head P5
47
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
48
+
49
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
50
+ ]
models/hub/yolov5s-ghost.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3Ghost, [128]],
19
+ [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3Ghost, [256]],
21
+ [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3Ghost, [512]],
23
+ [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3Ghost, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, GhostConv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3Ghost, [512, False]], # 13
34
+
35
+ [-1, 1, GhostConv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, GhostConv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, GhostConv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/hub/yolov5s-transformer.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/hub/yolov5s6.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [19, 27, 44, 40, 38, 94] # P3/8
9
+ - [96, 68, 86, 152, 180, 137] # P4/16
10
+ - [140, 301, 303, 264, 238, 542] # P5/32
11
+ - [436, 615, 739, 380, 925, 792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [
17
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
27
+ [-1, 3, C3, [1024]],
28
+ [-1, 1, SPPF, [1024, 5]], # 11
29
+ ]
30
+
31
+ # YOLOv5 v6.0 head
32
+ head: [
33
+ [-1, 1, Conv, [768, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
36
+ [-1, 3, C3, [768, False]], # 15
37
+
38
+ [-1, 1, Conv, [512, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
40
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
41
+ [-1, 3, C3, [512, False]], # 19
42
+
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
45
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
46
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
47
+
48
+ [-1, 1, Conv, [256, 3, 2]],
49
+ [[-1, 20], 1, Concat, [1]], # cat head P4
50
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
51
+
52
+ [-1, 1, Conv, [512, 3, 2]],
53
+ [[-1, 16], 1, Concat, [1]], # cat head P5
54
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
55
+
56
+ [-1, 1, Conv, [768, 3, 2]],
57
+ [[-1, 12], 1, Concat, [1]], # cat head P6
58
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
59
+
60
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
61
+ ]
models/hub/yolov5x6.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.33 # model depth multiple
6
+ width_multiple: 1.25 # layer channel multiple
7
+ anchors:
8
+ - [19, 27, 44, 40, 38, 94] # P3/8
9
+ - [96, 68, 86, 152, 180, 137] # P4/16
10
+ - [140, 301, 303, 264, 238, 542] # P5/32
11
+ - [436, 615, 739, 380, 925, 792] # P6/64
12
+
13
+ # YOLOv5 v6.0 backbone
14
+ backbone:
15
+ # [from, number, module, args]
16
+ [
17
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
18
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
19
+ [-1, 3, C3, [128]],
20
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
21
+ [-1, 6, C3, [256]],
22
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
23
+ [-1, 9, C3, [512]],
24
+ [-1, 1, Conv, [768, 3, 2]], # 7-P5/32
25
+ [-1, 3, C3, [768]],
26
+ [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
27
+ [-1, 3, C3, [1024]],
28
+ [-1, 1, SPPF, [1024, 5]], # 11
29
+ ]
30
+
31
+ # YOLOv5 v6.0 head
32
+ head: [
33
+ [-1, 1, Conv, [768, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
35
+ [[-1, 8], 1, Concat, [1]], # cat backbone P5
36
+ [-1, 3, C3, [768, False]], # 15
37
+
38
+ [-1, 1, Conv, [512, 1, 1]],
39
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
40
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
41
+ [-1, 3, C3, [512, False]], # 19
42
+
43
+ [-1, 1, Conv, [256, 1, 1]],
44
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
45
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
46
+ [-1, 3, C3, [256, False]], # 23 (P3/8-small)
47
+
48
+ [-1, 1, Conv, [256, 3, 2]],
49
+ [[-1, 20], 1, Concat, [1]], # cat head P4
50
+ [-1, 3, C3, [512, False]], # 26 (P4/16-medium)
51
+
52
+ [-1, 1, Conv, [512, 3, 2]],
53
+ [[-1, 16], 1, Concat, [1]], # cat head P5
54
+ [-1, 3, C3, [768, False]], # 29 (P5/32-large)
55
+
56
+ [-1, 1, Conv, [768, 3, 2]],
57
+ [[-1, 12], 1, Concat, [1]], # cat head P6
58
+ [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
59
+
60
+ [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
61
+ ]
models/segment/yolov5l-seg.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
49
+ ]
models/segment/yolov5m-seg.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.67 # model depth multiple
6
+ width_multiple: 0.75 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
49
+ ]
models/segment/yolov5n-seg.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.25 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
49
+ ]
models/segment/yolov5s-seg.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.5 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
49
+ ]
models/segment/yolov5x-seg.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.33 # model depth multiple
6
+ width_multiple: 1.25 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
49
+ ]
models/tf.py ADDED
@@ -0,0 +1,775 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """
3
+ TensorFlow, Keras and TFLite versions of YOLOv5
4
+ Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127.
5
+
6
+ Usage:
7
+ $ python models/tf.py --weights yolov5s.pt
8
+
9
+ Export:
10
+ $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
11
+ """
12
+
13
+ import argparse
14
+ import sys
15
+ from copy import deepcopy
16
+ from pathlib import Path
17
+
18
+ FILE = Path(__file__).resolve()
19
+ ROOT = FILE.parents[1] # YOLOv5 root directory
20
+ if str(ROOT) not in sys.path:
21
+ sys.path.append(str(ROOT)) # add ROOT to PATH
22
+ # ROOT = ROOT.relative_to(Path.cwd()) # relative
23
+
24
+ import numpy as np
25
+ import tensorflow as tf
26
+ import torch
27
+ import torch.nn as nn
28
+ from tensorflow import keras
29
+
30
+ from models.common import (
31
+ C3,
32
+ SPP,
33
+ SPPF,
34
+ Bottleneck,
35
+ BottleneckCSP,
36
+ C3x,
37
+ Concat,
38
+ Conv,
39
+ CrossConv,
40
+ DWConv,
41
+ DWConvTranspose2d,
42
+ Focus,
43
+ autopad,
44
+ )
45
+ from models.experimental import MixConv2d, attempt_load
46
+ from models.yolo import Detect, Segment
47
+ from utils.activations import SiLU
48
+ from utils.general import LOGGER, make_divisible, print_args
49
+
50
+
51
+ class TFBN(keras.layers.Layer):
52
+ """TensorFlow BatchNormalization wrapper for initializing with optional pretrained weights."""
53
+
54
+ def __init__(self, w=None):
55
+ """Initializes a TensorFlow BatchNormalization layer with optional pretrained weights."""
56
+ super().__init__()
57
+ self.bn = keras.layers.BatchNormalization(
58
+ beta_initializer=keras.initializers.Constant(w.bias.numpy()),
59
+ gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
60
+ moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
61
+ moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
62
+ epsilon=w.eps,
63
+ )
64
+
65
+ def call(self, inputs):
66
+ """Applies batch normalization to the inputs."""
67
+ return self.bn(inputs)
68
+
69
+
70
+ class TFPad(keras.layers.Layer):
71
+ """Pads input tensors in spatial dimensions 1 and 2 with specified integer or tuple padding values."""
72
+
73
+ def __init__(self, pad):
74
+ """Initializes a padding layer for spatial dimensions 1 and 2 with specified padding, supporting both int and
75
+ tuple inputs.
76
+
77
+ Inputs are
78
+ """
79
+ super().__init__()
80
+ if isinstance(pad, int):
81
+ self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
82
+ else: # tuple/list
83
+ self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
84
+
85
+ def call(self, inputs):
86
+ """Pads input tensor with zeros using specified padding, suitable for int and tuple pad dimensions."""
87
+ return tf.pad(inputs, self.pad, mode="constant", constant_values=0)
88
+
89
+
90
+ class TFConv(keras.layers.Layer):
91
+ """Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow."""
92
+
93
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
94
+ """Initializes a standard convolution layer with optional batch normalization and activation; supports only
95
+ group=1.
96
+
97
+ Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
98
+ """
99
+ super().__init__()
100
+ assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
101
+ # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
102
+ # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
103
+ conv = keras.layers.Conv2D(
104
+ filters=c2,
105
+ kernel_size=k,
106
+ strides=s,
107
+ padding="SAME" if s == 1 else "VALID",
108
+ use_bias=not hasattr(w, "bn"),
109
+ kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
110
+ bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
111
+ )
112
+ self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
113
+ self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
114
+ self.act = activations(w.act) if act else tf.identity
115
+
116
+ def call(self, inputs):
117
+ """Applies convolution, batch normalization, and activation function to input tensors."""
118
+ return self.act(self.bn(self.conv(inputs)))
119
+
120
+
121
+ class TFDWConv(keras.layers.Layer):
122
+ """Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow."""
123
+
124
+ def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
125
+ """Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow
126
+ models.
127
+
128
+ Input are ch_in, ch_out, weights, kernel, stride, padding, groups.
129
+ """
130
+ super().__init__()
131
+ assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
132
+ conv = keras.layers.DepthwiseConv2D(
133
+ kernel_size=k,
134
+ depth_multiplier=c2 // c1,
135
+ strides=s,
136
+ padding="SAME" if s == 1 else "VALID",
137
+ use_bias=not hasattr(w, "bn"),
138
+ depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
139
+ bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
140
+ )
141
+ self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
142
+ self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
143
+ self.act = activations(w.act) if act else tf.identity
144
+
145
+ def call(self, inputs):
146
+ """Applies convolution, batch normalization, and activation function to input tensors."""
147
+ return self.act(self.bn(self.conv(inputs)))
148
+
149
+
150
+ class TFDWConvTranspose2d(keras.layers.Layer):
151
+ """Implements a depthwise ConvTranspose2D layer for TensorFlow with specific settings."""
152
+
153
+ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
154
+ """Initializes depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.
155
+
156
+ Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
157
+ """
158
+ super().__init__()
159
+ assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
160
+ assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
161
+ weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
162
+ self.c1 = c1
163
+ self.conv = [
164
+ keras.layers.Conv2DTranspose(
165
+ filters=1,
166
+ kernel_size=k,
167
+ strides=s,
168
+ padding="VALID",
169
+ output_padding=p2,
170
+ use_bias=True,
171
+ kernel_initializer=keras.initializers.Constant(weight[..., i : i + 1]),
172
+ bias_initializer=keras.initializers.Constant(bias[i]),
173
+ )
174
+ for i in range(c1)
175
+ ]
176
+
177
+ def call(self, inputs):
178
+ """Processes input through parallel convolutions and concatenates results, trimming border pixels."""
179
+ return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
180
+
181
+
182
+ class TFFocus(keras.layers.Layer):
183
+ """Focuses spatial information into channel space using pixel shuffling and convolution for TensorFlow models."""
184
+
185
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
186
+ """Initializes TFFocus layer to focus width and height information into channel space with custom convolution
187
+ parameters.
188
+
189
+ Inputs are ch_in, ch_out, kernel, stride, padding, groups.
190
+ """
191
+ super().__init__()
192
+ self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
193
+
194
+ def call(self, inputs):
195
+ """Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4.
196
+
197
+ Example x(b,w,h,c) -> y(b,w/2,h/2,4c).
198
+ """
199
+ inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
200
+ return self.conv(tf.concat(inputs, 3))
201
+
202
+
203
+ class TFBottleneck(keras.layers.Layer):
204
+ """Implements a TensorFlow bottleneck layer with optional shortcut connections for efficient feature extraction."""
205
+
206
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
207
+ """Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with
208
+ optional shortcut.
209
+
210
+ Arguments are ch_in, ch_out, shortcut, groups, expansion.
211
+ """
212
+ super().__init__()
213
+ c_ = int(c2 * e) # hidden channels
214
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
215
+ self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
216
+ self.add = shortcut and c1 == c2
217
+
218
+ def call(self, inputs):
219
+ """Performs forward pass; if shortcut is True & input/output channels match, adds input to the convolution
220
+ result.
221
+ """
222
+ return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
223
+
224
+
225
+ class TFCrossConv(keras.layers.Layer):
226
+ """Implements a cross convolutional layer with optional expansion, grouping, and shortcut for TensorFlow."""
227
+
228
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
229
+ """Initializes cross convolution layer with optional expansion, grouping, and shortcut addition capabilities."""
230
+ super().__init__()
231
+ c_ = int(c2 * e) # hidden channels
232
+ self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
233
+ self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
234
+ self.add = shortcut and c1 == c2
235
+
236
+ def call(self, inputs):
237
+ """Passes input through two convolutions optionally adding the input if channel dimensions match."""
238
+ return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
239
+
240
+
241
+ class TFConv2d(keras.layers.Layer):
242
+ """Implements a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D for specified filters and stride."""
243
+
244
+ def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
245
+ """Initializes a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D functionality for given filter
246
+ sizes and stride.
247
+ """
248
+ super().__init__()
249
+ assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
250
+ self.conv = keras.layers.Conv2D(
251
+ filters=c2,
252
+ kernel_size=k,
253
+ strides=s,
254
+ padding="VALID",
255
+ use_bias=bias,
256
+ kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
257
+ bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
258
+ )
259
+
260
+ def call(self, inputs):
261
+ """Applies a convolution operation to the inputs and returns the result."""
262
+ return self.conv(inputs)
263
+
264
+
265
+ class TFBottleneckCSP(keras.layers.Layer):
266
+ """Implements a CSP bottleneck layer for TensorFlow models to enhance gradient flow and efficiency."""
267
+
268
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
269
+ """Initializes CSP bottleneck layer with specified channel sizes, count, shortcut option, groups, and expansion
270
+ ratio.
271
+
272
+ Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
273
+ """
274
+ super().__init__()
275
+ c_ = int(c2 * e) # hidden channels
276
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
277
+ self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
278
+ self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
279
+ self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
280
+ self.bn = TFBN(w.bn)
281
+ self.act = lambda x: keras.activations.swish(x)
282
+ self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
283
+
284
+ def call(self, inputs):
285
+ """Processes input through the model layers, concatenates, normalizes, activates, and reduces the output
286
+ dimensions.
287
+ """
288
+ y1 = self.cv3(self.m(self.cv1(inputs)))
289
+ y2 = self.cv2(inputs)
290
+ return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
291
+
292
+
293
+ class TFC3(keras.layers.Layer):
294
+ """CSP bottleneck layer with 3 convolutions for TensorFlow, supporting optional shortcuts and group convolutions."""
295
+
296
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
297
+ """Initializes CSP Bottleneck with 3 convolutions, supporting optional shortcuts and group convolutions.
298
+
299
+ Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
300
+ """
301
+ super().__init__()
302
+ c_ = int(c2 * e) # hidden channels
303
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
304
+ self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
305
+ self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
306
+ self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
307
+
308
+ def call(self, inputs):
309
+ """Processes input through a sequence of transformations for object detection (YOLOv5).
310
+
311
+ See https://github.com/ultralytics/yolov5.
312
+ """
313
+ return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
314
+
315
+
316
+ class TFC3x(keras.layers.Layer):
317
+ """A TensorFlow layer for enhanced feature extraction using cross-convolutions in object detection models."""
318
+
319
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
320
+ """Initializes layer with cross-convolutions for enhanced feature extraction in object detection models.
321
+
322
+ Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
323
+ """
324
+ super().__init__()
325
+ c_ = int(c2 * e) # hidden channels
326
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
327
+ self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
328
+ self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
329
+ self.m = keras.Sequential(
330
+ [TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]
331
+ )
332
+
333
+ def call(self, inputs):
334
+ """Processes input through cascaded convolutions and merges features, returning the final tensor output."""
335
+ return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
336
+
337
+
338
+ class TFSPP(keras.layers.Layer):
339
+ """Implements spatial pyramid pooling for YOLOv3-SPP with specific channels and kernel sizes."""
340
+
341
+ def __init__(self, c1, c2, k=(5, 9, 13), w=None):
342
+ """Initializes a YOLOv3-SPP layer with specific input/output channels and kernel sizes for pooling."""
343
+ super().__init__()
344
+ c_ = c1 // 2 # hidden channels
345
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
346
+ self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
347
+ self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]
348
+
349
+ def call(self, inputs):
350
+ """Processes input through two TFConv layers and concatenates with max-pooled outputs at intermediate stage."""
351
+ x = self.cv1(inputs)
352
+ return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
353
+
354
+
355
+ class TFSPPF(keras.layers.Layer):
356
+ """Implements a fast spatial pyramid pooling layer for TensorFlow with optimized feature extraction."""
357
+
358
+ def __init__(self, c1, c2, k=5, w=None):
359
+ """Initialize a fast spatial pyramid pooling layer with customizable channels, kernel size, and weights."""
360
+ super().__init__()
361
+ c_ = c1 // 2 # hidden channels
362
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
363
+ self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
364
+ self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")
365
+
366
+ def call(self, inputs):
367
+ """Executes the model's forward pass, concatenating input features with three max-pooled versions before final
368
+ convolution.
369
+ """
370
+ x = self.cv1(inputs)
371
+ y1 = self.m(x)
372
+ y2 = self.m(y1)
373
+ return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
374
+
375
+
376
+ class TFDetect(keras.layers.Layer):
377
+ """Implements YOLOv5 object detection layer in TensorFlow for predicting bounding boxes and class probabilities."""
378
+
379
+ def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
380
+ """Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image
381
+ size.
382
+ """
383
+ super().__init__()
384
+ self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
385
+ self.nc = nc # number of classes
386
+ self.no = nc + 5 # number of outputs per anchor
387
+ self.nl = len(anchors) # number of detection layers
388
+ self.na = len(anchors[0]) // 2 # number of anchors
389
+ self.grid = [tf.zeros(1)] * self.nl # init grid
390
+ self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
391
+ self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
392
+ self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
393
+ self.training = False # set to False after building model
394
+ self.imgsz = imgsz
395
+ for i in range(self.nl):
396
+ ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
397
+ self.grid[i] = self._make_grid(nx, ny)
398
+
399
+ def call(self, inputs):
400
+ """Performs forward pass through the model layers to predict object bounding boxes and classifications."""
401
+ z = [] # inference output
402
+ x = []
403
+ for i in range(self.nl):
404
+ x.append(self.m[i](inputs[i]))
405
+ # x(bs,20,20,255) to x(bs,3,20,20,85)
406
+ ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
407
+ x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
408
+
409
+ if not self.training: # inference
410
+ y = x[i]
411
+ grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
412
+ anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
413
+ xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
414
+ wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
415
+ # Normalize xywh to 0-1 to reduce calibration error
416
+ xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
417
+ wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
418
+ y = tf.concat([xy, wh, tf.sigmoid(y[..., 4 : 5 + self.nc]), y[..., 5 + self.nc :]], -1)
419
+ z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
420
+
421
+ return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
422
+
423
+ @staticmethod
424
+ def _make_grid(nx=20, ny=20):
425
+ """Generates a 2D grid of coordinates in (x, y) format with shape [1, 1, ny*nx, 2]."""
426
+ # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
427
+ xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
428
+ return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
429
+
430
+
431
+ class TFSegment(TFDetect):
432
+ """YOLOv5 segmentation head for TensorFlow, combining detection and segmentation."""
433
+
434
+ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
435
+ """Initializes YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation
436
+ models.
437
+ """
438
+ super().__init__(nc, anchors, ch, imgsz, w)
439
+ self.nm = nm # number of masks
440
+ self.npr = npr # number of protos
441
+ self.no = 5 + nc + self.nm # number of outputs per anchor
442
+ self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
443
+ self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
444
+ self.detect = TFDetect.call
445
+
446
+ def call(self, x):
447
+ """Applies detection and proto layers on input, returning detections and optionally protos if training."""
448
+ p = self.proto(x[0])
449
+ # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
450
+ p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
451
+ x = self.detect(self, x)
452
+ return (x, p) if self.training else (x[0], p)
453
+
454
+
455
+ class TFProto(keras.layers.Layer):
456
+ """Implements convolutional and upsampling layers for feature extraction in YOLOv5 segmentation."""
457
+
458
+ def __init__(self, c1, c_=256, c2=32, w=None):
459
+ """Initialize TFProto layer with convolutional and upsampling for feature extraction and transformation."""
460
+ super().__init__()
461
+ self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
462
+ self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
463
+ self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
464
+ self.cv3 = TFConv(c_, c2, w=w.cv3)
465
+
466
+ def call(self, inputs):
467
+ """Performs forward pass through the model, applying convolutions and upscaling on input tensor."""
468
+ return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
469
+
470
+
471
+ class TFUpsample(keras.layers.Layer):
472
+ """Implements a TensorFlow upsampling layer with specified size, scale factor, and interpolation mode."""
473
+
474
+ def __init__(self, size, scale_factor, mode, w=None):
475
+ """Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor
476
+ is even.
477
+
478
+ Warning: all arguments needed including 'w'
479
+ """
480
+ super().__init__()
481
+ assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
482
+ self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
483
+ # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
484
+ # with default arguments: align_corners=False, half_pixel_centers=False
485
+ # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
486
+ # size=(x.shape[1] * 2, x.shape[2] * 2))
487
+
488
+ def call(self, inputs):
489
+ """Applies upsample operation to inputs using nearest neighbor interpolation."""
490
+ return self.upsample(inputs)
491
+
492
+
493
+ class TFConcat(keras.layers.Layer):
494
+ """Implements TensorFlow's version of torch.concat() for concatenating tensors along the last dimension."""
495
+
496
+ def __init__(self, dimension=1, w=None):
497
+ """Initializes a TensorFlow layer for NCHW to NHWC concatenation, requiring dimension=1."""
498
+ super().__init__()
499
+ assert dimension == 1, "convert only NCHW to NHWC concat"
500
+ self.d = 3
501
+
502
+ def call(self, inputs):
503
+ """Concatenates a list of tensors along the last dimension, used for NCHW to NHWC conversion."""
504
+ return tf.concat(inputs, self.d)
505
+
506
+
507
+ def parse_model(d, ch, model, imgsz):
508
+ """Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments."""
509
+ LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
510
+ anchors, nc, gd, gw, ch_mul = (
511
+ d["anchors"],
512
+ d["nc"],
513
+ d["depth_multiple"],
514
+ d["width_multiple"],
515
+ d.get("channel_multiple"),
516
+ )
517
+ na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
518
+ no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
519
+ if not ch_mul:
520
+ ch_mul = 8
521
+
522
+ layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
523
+ for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
524
+ m_str = m
525
+ m = eval(m) if isinstance(m, str) else m # eval strings
526
+ for j, a in enumerate(args):
527
+ try:
528
+ args[j] = eval(a) if isinstance(a, str) else a # eval strings
529
+ except NameError:
530
+ pass
531
+
532
+ n = max(round(n * gd), 1) if n > 1 else n # depth gain
533
+ if m in [
534
+ nn.Conv2d,
535
+ Conv,
536
+ DWConv,
537
+ DWConvTranspose2d,
538
+ Bottleneck,
539
+ SPP,
540
+ SPPF,
541
+ MixConv2d,
542
+ Focus,
543
+ CrossConv,
544
+ BottleneckCSP,
545
+ C3,
546
+ C3x,
547
+ ]:
548
+ c1, c2 = ch[f], args[0]
549
+ c2 = make_divisible(c2 * gw, ch_mul) if c2 != no else c2
550
+
551
+ args = [c1, c2, *args[1:]]
552
+ if m in [BottleneckCSP, C3, C3x]:
553
+ args.insert(2, n)
554
+ n = 1
555
+ elif m is nn.BatchNorm2d:
556
+ args = [ch[f]]
557
+ elif m is Concat:
558
+ c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
559
+ elif m in [Detect, Segment]:
560
+ args.append([ch[x + 1] for x in f])
561
+ if isinstance(args[1], int): # number of anchors
562
+ args[1] = [list(range(args[1] * 2))] * len(f)
563
+ if m is Segment:
564
+ args[3] = make_divisible(args[3] * gw, ch_mul)
565
+ args.append(imgsz)
566
+ else:
567
+ c2 = ch[f]
568
+
569
+ tf_m = eval("TF" + m_str.replace("nn.", ""))
570
+ m_ = (
571
+ keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)])
572
+ if n > 1
573
+ else tf_m(*args, w=model.model[i])
574
+ ) # module
575
+
576
+ torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
577
+ t = str(m)[8:-2].replace("__main__.", "") # module type
578
+ np = sum(x.numel() for x in torch_m_.parameters()) # number params
579
+ m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
580
+ LOGGER.info(f"{i:>3}{f!s:>18}{n!s:>3}{np:>10} {t:<40}{args!s:<30}") # print
581
+ save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
582
+ layers.append(m_)
583
+ ch.append(c2)
584
+ return keras.Sequential(layers), sorted(save)
585
+
586
+
587
+ class TFModel:
588
+ """Implements YOLOv5 model in TensorFlow, supporting TensorFlow, Keras, and TFLite formats for object detection."""
589
+
590
+ def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
591
+ """Initialize TF YOLOv5 model with specified channels, classes, model instance, and input size."""
592
+ super().__init__()
593
+ if isinstance(cfg, dict):
594
+ self.yaml = cfg # model dict
595
+ else: # is *.yaml
596
+ import yaml # for torch hub
597
+
598
+ self.yaml_file = Path(cfg).name
599
+ with open(cfg) as f:
600
+ self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
601
+
602
+ # Define model
603
+ if nc and nc != self.yaml["nc"]:
604
+ LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
605
+ self.yaml["nc"] = nc # override yaml value
606
+ self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
607
+
608
+ def predict(
609
+ self,
610
+ inputs,
611
+ tf_nms=False,
612
+ agnostic_nms=False,
613
+ topk_per_class=100,
614
+ topk_all=100,
615
+ iou_thres=0.45,
616
+ conf_thres=0.25,
617
+ ):
618
+ """Runs inference on input data, with an option for TensorFlow NMS."""
619
+ y = [] # outputs
620
+ x = inputs
621
+ for m in self.model.layers:
622
+ if m.f != -1: # if not from previous layer
623
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
624
+
625
+ x = m(x) # run
626
+ y.append(x if m.i in self.savelist else None) # save output
627
+
628
+ # Add TensorFlow NMS
629
+ if tf_nms:
630
+ boxes = self._xywh2xyxy(x[0][..., :4])
631
+ probs = x[0][:, :, 4:5]
632
+ classes = x[0][:, :, 5:]
633
+ scores = probs * classes
634
+ if agnostic_nms:
635
+ nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
636
+ else:
637
+ boxes = tf.expand_dims(boxes, 2)
638
+ nms = tf.image.combined_non_max_suppression(
639
+ boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False
640
+ )
641
+ return (nms,)
642
+ return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
643
+ # x = x[0] # [x(1,6300,85), ...] to x(6300,85)
644
+ # xywh = x[..., :4] # x(6300,4) boxes
645
+ # conf = x[..., 4:5] # x(6300,1) confidences
646
+ # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
647
+ # return tf.concat([conf, cls, xywh], 1)
648
+
649
+ @staticmethod
650
+ def _xywh2xyxy(xywh):
651
+ """Convert box format from [x, y, w, h] to [x1, y1, x2, y2], where xy1=top-left and xy2=bottom- right."""
652
+ x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
653
+ return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
654
+
655
+
656
+ class AgnosticNMS(keras.layers.Layer):
657
+ """Performs agnostic non-maximum suppression (NMS) on detected objects using IoU and confidence thresholds."""
658
+
659
+ def call(self, input, topk_all, iou_thres, conf_thres):
660
+ """Performs agnostic NMS on input tensors using given thresholds and top-K selection."""
661
+ return tf.map_fn(
662
+ lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
663
+ input,
664
+ fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
665
+ name="agnostic_nms",
666
+ )
667
+
668
+ @staticmethod
669
+ def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
670
+ """Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence
671
+ thresholds.
672
+ """
673
+ boxes, classes, scores = x
674
+ class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
675
+ scores_inp = tf.reduce_max(scores, -1)
676
+ selected_inds = tf.image.non_max_suppression(
677
+ boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres
678
+ )
679
+ selected_boxes = tf.gather(boxes, selected_inds)
680
+ padded_boxes = tf.pad(
681
+ selected_boxes,
682
+ paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
683
+ mode="CONSTANT",
684
+ constant_values=0.0,
685
+ )
686
+ selected_scores = tf.gather(scores_inp, selected_inds)
687
+ padded_scores = tf.pad(
688
+ selected_scores,
689
+ paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
690
+ mode="CONSTANT",
691
+ constant_values=-1.0,
692
+ )
693
+ selected_classes = tf.gather(class_inds, selected_inds)
694
+ padded_classes = tf.pad(
695
+ selected_classes,
696
+ paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
697
+ mode="CONSTANT",
698
+ constant_values=-1.0,
699
+ )
700
+ valid_detections = tf.shape(selected_inds)[0]
701
+ return padded_boxes, padded_scores, padded_classes, valid_detections
702
+
703
+
704
+ def activations(act=nn.SiLU):
705
+ """Converts PyTorch activations to TensorFlow equivalents, supporting LeakyReLU, Hardswish, and SiLU/Swish."""
706
+ if isinstance(act, nn.LeakyReLU):
707
+ return lambda x: keras.activations.relu(x, alpha=0.1)
708
+ elif isinstance(act, nn.Hardswish):
709
+ return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
710
+ elif isinstance(act, (nn.SiLU, SiLU)):
711
+ return lambda x: keras.activations.swish(x)
712
+ else:
713
+ raise Exception(f"no matching TensorFlow activation found for PyTorch activation {act}")
714
+
715
+
716
+ def representative_dataset_gen(dataset, ncalib=100):
717
+ """Generate representative dataset for calibration by yielding transformed numpy arrays from the input dataset."""
718
+ for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
719
+ im = np.transpose(img, [1, 2, 0])
720
+ im = np.expand_dims(im, axis=0).astype(np.float32)
721
+ im /= 255
722
+ yield [im]
723
+ if n >= ncalib:
724
+ break
725
+
726
+
727
+ def run(
728
+ weights=ROOT / "yolov5s.pt", # weights path
729
+ imgsz=(640, 640), # inference size h,w
730
+ batch_size=1, # batch size
731
+ dynamic=False, # dynamic batch size
732
+ ):
733
+ # PyTorch model
734
+ """Exports YOLOv5 model from PyTorch to TensorFlow and Keras formats, performing inference for validation."""
735
+ im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
736
+ model = attempt_load(weights, device=torch.device("cpu"), inplace=True, fuse=False)
737
+ _ = model(im) # inference
738
+ model.info()
739
+
740
+ # TensorFlow model
741
+ im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
742
+ tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
743
+ _ = tf_model.predict(im) # inference
744
+
745
+ # Keras model
746
+ im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
747
+ keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
748
+ keras_model.summary()
749
+
750
+ LOGGER.info("PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.")
751
+
752
+
753
+ def parse_opt():
754
+ """Parses and returns command-line options for model inference, including weights path, image size, batch size, and
755
+ dynamic batching.
756
+ """
757
+ parser = argparse.ArgumentParser()
758
+ parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
759
+ parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
760
+ parser.add_argument("--batch-size", type=int, default=1, help="batch size")
761
+ parser.add_argument("--dynamic", action="store_true", help="dynamic batch size")
762
+ opt = parser.parse_args()
763
+ opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
764
+ print_args(vars(opt))
765
+ return opt
766
+
767
+
768
+ def main(opt):
769
+ """Executes the YOLOv5 model run function with parsed command line options."""
770
+ run(**vars(opt))
771
+
772
+
773
+ if __name__ == "__main__":
774
+ opt = parse_opt()
775
+ main(opt)
models/yolo.py ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """
3
+ YOLO-specific modules.
4
+
5
+ Usage:
6
+ $ python models/yolo.py --cfg yolov5s.yaml
7
+ """
8
+
9
+ import argparse
10
+ import contextlib
11
+ import math
12
+ import os
13
+ import platform
14
+ import sys
15
+ from copy import deepcopy
16
+ from pathlib import Path
17
+
18
+ import torch
19
+ import torch.nn as nn
20
+
21
+ FILE = Path(__file__).resolve()
22
+ ROOT = FILE.parents[1] # YOLOv5 root directory
23
+ if str(ROOT) not in sys.path:
24
+ sys.path.append(str(ROOT)) # add ROOT to PATH
25
+ if platform.system() != "Windows":
26
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
27
+
28
+ from models.common import (
29
+ C3,
30
+ C3SPP,
31
+ C3TR,
32
+ SPP,
33
+ SPPF,
34
+ Bottleneck,
35
+ BottleneckCSP,
36
+ C3Ghost,
37
+ C3x,
38
+ Classify,
39
+ Concat,
40
+ Contract,
41
+ Conv,
42
+ CrossConv,
43
+ DetectMultiBackend,
44
+ DWConv,
45
+ DWConvTranspose2d,
46
+ Expand,
47
+ Focus,
48
+ GhostBottleneck,
49
+ GhostConv,
50
+ Proto,
51
+ )
52
+ from models.experimental import MixConv2d
53
+ from utils.autoanchor import check_anchor_order
54
+ from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
55
+ from utils.plots import feature_visualization
56
+ from utils.torch_utils import (
57
+ fuse_conv_and_bn,
58
+ initialize_weights,
59
+ model_info,
60
+ profile,
61
+ scale_img,
62
+ select_device,
63
+ time_sync,
64
+ )
65
+
66
+ try:
67
+ import thop # for FLOPs computation
68
+ except ImportError:
69
+ thop = None
70
+
71
+
72
+ class Detect(nn.Module):
73
+ """YOLOv5 Detect head for processing input tensors and generating detection outputs in object detection models."""
74
+
75
+ stride = None # strides computed during build
76
+ dynamic = False # force grid reconstruction
77
+ export = False # export mode
78
+
79
+ def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
80
+ """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
81
+ super().__init__()
82
+ self.nc = nc # number of classes
83
+ self.no = nc + 5 # number of outputs per anchor
84
+ self.nl = len(anchors) # number of detection layers
85
+ self.na = len(anchors[0]) // 2 # number of anchors
86
+ self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
87
+ self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
88
+ self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
89
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
90
+ self.inplace = inplace # use inplace ops (e.g. slice assignment)
91
+
92
+ def forward(self, x):
93
+ """Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
94
+ z = [] # inference output
95
+ for i in range(self.nl):
96
+ x[i] = self.m[i](x[i]) # conv
97
+ bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
98
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
99
+
100
+ if not self.training: # inference
101
+ if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
102
+ self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
103
+
104
+ if isinstance(self, Segment): # (boxes + masks)
105
+ xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
106
+ xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
107
+ wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
108
+ y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
109
+ else: # Detect (boxes only)
110
+ xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
111
+ xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
112
+ wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
113
+ y = torch.cat((xy, wh, conf), 4)
114
+ z.append(y.view(bs, self.na * nx * ny, self.no))
115
+
116
+ return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
117
+
118
+ def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
119
+ """Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
120
+ d = self.anchors[i].device
121
+ t = self.anchors[i].dtype
122
+ shape = 1, self.na, ny, nx, 2 # grid shape
123
+ y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
124
+ yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
125
+ grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
126
+ anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
127
+ return grid, anchor_grid
128
+
129
+
130
+ class Segment(Detect):
131
+ """YOLOv5 Segment head for segmentation models, extending Detect with mask and prototype layers."""
132
+
133
+ def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
134
+ """Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
135
+ super().__init__(nc, anchors, ch, inplace)
136
+ self.nm = nm # number of masks
137
+ self.npr = npr # number of protos
138
+ self.no = 5 + nc + self.nm # number of outputs per anchor
139
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
140
+ self.proto = Proto(ch[0], self.npr, self.nm) # protos
141
+ self.detect = Detect.forward
142
+
143
+ def forward(self, x):
144
+ """Processes input through the network, returning detections and prototypes; adjusts output based on
145
+ training/export mode.
146
+ """
147
+ p = self.proto(x[0])
148
+ x = self.detect(self, x)
149
+ return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
150
+
151
+
152
+ class BaseModel(nn.Module):
153
+ """YOLOv5 base model."""
154
+
155
+ def forward(self, x, profile=False, visualize=False):
156
+ """Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
157
+ visualization.
158
+ """
159
+ return self._forward_once(x, profile, visualize) # single-scale inference, train
160
+
161
+ def _forward_once(self, x, profile=False, visualize=False):
162
+ """Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
163
+ y, dt = [], [] # outputs
164
+ for m in self.model:
165
+ if m.f != -1: # if not from previous layer
166
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
167
+ if profile:
168
+ self._profile_one_layer(m, x, dt)
169
+ x = m(x) # run
170
+ y.append(x if m.i in self.save else None) # save output
171
+ if visualize:
172
+ feature_visualization(x, m.type, m.i, save_dir=visualize)
173
+ return x
174
+
175
+ def _profile_one_layer(self, m, x, dt):
176
+ """Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
177
+ c = m == self.model[-1] # is final layer, copy input as inplace fix
178
+ o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs
179
+ t = time_sync()
180
+ for _ in range(10):
181
+ m(x.copy() if c else x)
182
+ dt.append((time_sync() - t) * 100)
183
+ if m == self.model[0]:
184
+ LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
185
+ LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}")
186
+ if c:
187
+ LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
188
+
189
+ def fuse(self):
190
+ """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
191
+ LOGGER.info("Fusing layers... ")
192
+ for m in self.model.modules():
193
+ if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
194
+ m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
195
+ delattr(m, "bn") # remove batchnorm
196
+ m.forward = m.forward_fuse # update forward
197
+ self.info()
198
+ return self
199
+
200
+ def info(self, verbose=False, img_size=640):
201
+ """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
202
+ model_info(self, verbose, img_size)
203
+
204
+ def _apply(self, fn):
205
+ """Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
206
+ buffers.
207
+ """
208
+ self = super()._apply(fn)
209
+ m = self.model[-1] # Detect()
210
+ if isinstance(m, (Detect, Segment)):
211
+ m.stride = fn(m.stride)
212
+ m.grid = list(map(fn, m.grid))
213
+ if isinstance(m.anchor_grid, list):
214
+ m.anchor_grid = list(map(fn, m.anchor_grid))
215
+ return self
216
+
217
+
218
+ class DetectionModel(BaseModel):
219
+ """YOLOv5 detection model class for object detection tasks, supporting custom configurations and anchors."""
220
+
221
+ def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
222
+ """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
223
+ super().__init__()
224
+ if isinstance(cfg, dict):
225
+ self.yaml = cfg # model dict
226
+ else: # is *.yaml
227
+ import yaml # for torch hub
228
+
229
+ self.yaml_file = Path(cfg).name
230
+ with open(cfg, encoding="ascii", errors="ignore") as f:
231
+ self.yaml = yaml.safe_load(f) # model dict
232
+
233
+ # Define model
234
+ ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
235
+ if nc and nc != self.yaml["nc"]:
236
+ LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
237
+ self.yaml["nc"] = nc # override yaml value
238
+ if anchors:
239
+ LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
240
+ self.yaml["anchors"] = round(anchors) # override yaml value
241
+ self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
242
+ self.names = [str(i) for i in range(self.yaml["nc"])] # default names
243
+ self.inplace = self.yaml.get("inplace", True)
244
+
245
+ # Build strides, anchors
246
+ m = self.model[-1] # Detect()
247
+ if isinstance(m, (Detect, Segment)):
248
+
249
+ def _forward(x):
250
+ """Passes the input 'x' through the model and returns the processed output."""
251
+ return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
252
+
253
+ s = 256 # 2x min stride
254
+ m.inplace = self.inplace
255
+ m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))]) # forward
256
+ check_anchor_order(m)
257
+ m.anchors /= m.stride.view(-1, 1, 1)
258
+ self.stride = m.stride
259
+ self._initialize_biases() # only run once
260
+
261
+ # Init weights, biases
262
+ initialize_weights(self)
263
+ self.info()
264
+ LOGGER.info("")
265
+
266
+ def forward(self, x, augment=False, profile=False, visualize=False):
267
+ """Performs single-scale or augmented inference and may include profiling or visualization."""
268
+ if augment:
269
+ return self._forward_augment(x) # augmented inference, None
270
+ return self._forward_once(x, profile, visualize) # single-scale inference, train
271
+
272
+ def _forward_augment(self, x):
273
+ """Performs augmented inference across different scales and flips, returning combined detections."""
274
+ img_size = x.shape[-2:] # height, width
275
+ s = [1, 0.83, 0.67] # scales
276
+ f = [None, 3, None] # flips (2-ud, 3-lr)
277
+ y = [] # outputs
278
+ for si, fi in zip(s, f):
279
+ xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
280
+ yi = self._forward_once(xi)[0] # forward
281
+ # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
282
+ yi = self._descale_pred(yi, fi, si, img_size)
283
+ y.append(yi)
284
+ y = self._clip_augmented(y) # clip augmented tails
285
+ return torch.cat(y, 1), None # augmented inference, train
286
+
287
+ def _descale_pred(self, p, flips, scale, img_size):
288
+ """De-scales predictions from augmented inference, adjusting for flips and image size."""
289
+ if self.inplace:
290
+ p[..., :4] /= scale # de-scale
291
+ if flips == 2:
292
+ p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
293
+ elif flips == 3:
294
+ p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
295
+ else:
296
+ x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
297
+ if flips == 2:
298
+ y = img_size[0] - y # de-flip ud
299
+ elif flips == 3:
300
+ x = img_size[1] - x # de-flip lr
301
+ p = torch.cat((x, y, wh, p[..., 4:]), -1)
302
+ return p
303
+
304
+ def _clip_augmented(self, y):
305
+ """Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
306
+ layer counts.
307
+ """
308
+ nl = self.model[-1].nl # number of detection layers (P3-P5)
309
+ g = sum(4**x for x in range(nl)) # grid points
310
+ e = 1 # exclude layer count
311
+ i = (y[0].shape[1] // g) * sum(4**x for x in range(e)) # indices
312
+ y[0] = y[0][:, :-i] # large
313
+ i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
314
+ y[-1] = y[-1][:, i:] # small
315
+ return y
316
+
317
+ def _initialize_biases(self, cf=None):
318
+ """Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
319
+
320
+ For details see https://arxiv.org/abs/1708.02002 section 3.3.
321
+ """
322
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
323
+ m = self.model[-1] # Detect() module
324
+ for mi, s in zip(m.m, m.stride): # from
325
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
326
+ b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
327
+ b.data[:, 5 : 5 + m.nc] += (
328
+ math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
329
+ ) # cls
330
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
331
+
332
+
333
+ Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
334
+
335
+
336
+ class SegmentationModel(DetectionModel):
337
+ """YOLOv5 segmentation model for object detection and segmentation tasks with configurable parameters."""
338
+
339
+ def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
340
+ """Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for
341
+ channels, nc (int) for num classes, anchors (list).
342
+ """
343
+ super().__init__(cfg, ch, nc, anchors)
344
+
345
+
346
+ class ClassificationModel(BaseModel):
347
+ """YOLOv5 classification model for image classification tasks, initialized with a config file or detection model."""
348
+
349
+ def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
350
+ """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
351
+ index.
352
+ """
353
+ super().__init__()
354
+ self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
355
+
356
+ def _from_detection_model(self, model, nc=1000, cutoff=10):
357
+ """Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
358
+ layer.
359
+ """
360
+ if isinstance(model, DetectMultiBackend):
361
+ model = model.model # unwrap DetectMultiBackend
362
+ model.model = model.model[:cutoff] # backbone
363
+ m = model.model[-1] # last layer
364
+ ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels # ch into module
365
+ c = Classify(ch, nc) # Classify()
366
+ c.i, c.f, c.type = m.i, m.f, "models.common.Classify" # index, from, type
367
+ model.model[-1] = c # replace
368
+ self.model = model.model
369
+ self.stride = model.stride
370
+ self.save = []
371
+ self.nc = nc
372
+
373
+ def _from_yaml(self, cfg):
374
+ """Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
375
+ self.model = None
376
+
377
+
378
+ def parse_model(d, ch):
379
+ """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
380
+ LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
381
+ anchors, nc, gd, gw, act, ch_mul = (
382
+ d["anchors"],
383
+ d["nc"],
384
+ d["depth_multiple"],
385
+ d["width_multiple"],
386
+ d.get("activation"),
387
+ d.get("channel_multiple"),
388
+ )
389
+ if act:
390
+ Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
391
+ LOGGER.info(f"{colorstr('activation:')} {act}") # print
392
+ if not ch_mul:
393
+ ch_mul = 8
394
+ na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
395
+ no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
396
+
397
+ layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
398
+ for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
399
+ m = eval(m) if isinstance(m, str) else m # eval strings
400
+ for j, a in enumerate(args):
401
+ with contextlib.suppress(NameError):
402
+ args[j] = eval(a) if isinstance(a, str) else a # eval strings
403
+
404
+ n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
405
+ if m in {
406
+ Conv,
407
+ GhostConv,
408
+ Bottleneck,
409
+ GhostBottleneck,
410
+ SPP,
411
+ SPPF,
412
+ DWConv,
413
+ MixConv2d,
414
+ Focus,
415
+ CrossConv,
416
+ BottleneckCSP,
417
+ C3,
418
+ C3TR,
419
+ C3SPP,
420
+ C3Ghost,
421
+ nn.ConvTranspose2d,
422
+ DWConvTranspose2d,
423
+ C3x,
424
+ }:
425
+ c1, c2 = ch[f], args[0]
426
+ if c2 != no: # if not output
427
+ c2 = make_divisible(c2 * gw, ch_mul)
428
+
429
+ args = [c1, c2, *args[1:]]
430
+ if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
431
+ args.insert(2, n) # number of repeats
432
+ n = 1
433
+ elif m is nn.BatchNorm2d:
434
+ args = [ch[f]]
435
+ elif m is Concat:
436
+ c2 = sum(ch[x] for x in f)
437
+ # TODO: channel, gw, gd
438
+ elif m in {Detect, Segment}:
439
+ args.append([ch[x] for x in f])
440
+ if isinstance(args[1], int): # number of anchors
441
+ args[1] = [list(range(args[1] * 2))] * len(f)
442
+ if m is Segment:
443
+ args[3] = make_divisible(args[3] * gw, ch_mul)
444
+ elif m is Contract:
445
+ c2 = ch[f] * args[0] ** 2
446
+ elif m is Expand:
447
+ c2 = ch[f] // args[0] ** 2
448
+ else:
449
+ c2 = ch[f]
450
+
451
+ m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
452
+ t = str(m)[8:-2].replace("__main__.", "") # module type
453
+ np = sum(x.numel() for x in m_.parameters()) # number params
454
+ m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
455
+ LOGGER.info(f"{i:>3}{f!s:>18}{n_:>3}{np:10.0f} {t:<40}{args!s:<30}") # print
456
+ save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
457
+ layers.append(m_)
458
+ if i == 0:
459
+ ch = []
460
+ ch.append(c2)
461
+ return nn.Sequential(*layers), sorted(save)
462
+
463
+
464
+ if __name__ == "__main__":
465
+ parser = argparse.ArgumentParser()
466
+ parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
467
+ parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
468
+ parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
469
+ parser.add_argument("--profile", action="store_true", help="profile model speed")
470
+ parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
471
+ parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
472
+ opt = parser.parse_args()
473
+ opt.cfg = check_yaml(opt.cfg) # check YAML
474
+ print_args(vars(opt))
475
+ device = select_device(opt.device)
476
+
477
+ # Create model
478
+ im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
479
+ model = Model(opt.cfg).to(device)
480
+
481
+ # Options
482
+ if opt.line_profile: # profile layer by layer
483
+ model(im, profile=True)
484
+
485
+ elif opt.profile: # profile forward-backward
486
+ results = profile(input=im, ops=[model], n=3)
487
+
488
+ elif opt.test: # test all models
489
+ for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
490
+ try:
491
+ _ = Model(cfg)
492
+ except Exception as e:
493
+ print(f"Error in {cfg}: {e}")
494
+
495
+ else: # report fused model summary
496
+ model.fuse()
models/yolov5l.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.0 # model depth multiple
6
+ width_multiple: 1.0 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/yolov5m.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.67 # model depth multiple
6
+ width_multiple: 0.75 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/yolov5n.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.25 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/yolov5s.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 0.33 # model depth multiple
6
+ width_multiple: 0.50 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
models/yolov5x.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Parameters
4
+ nc: 80 # number of classes
5
+ depth_multiple: 1.33 # model depth multiple
6
+ width_multiple: 1.25 # layer channel multiple
7
+ anchors:
8
+ - [10, 13, 16, 30, 33, 23] # P3/8
9
+ - [30, 61, 62, 45, 59, 119] # P4/16
10
+ - [116, 90, 156, 198, 373, 326] # P5/32
11
+
12
+ # YOLOv5 v6.0 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [
16
+ [-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
17
+ [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
18
+ [-1, 3, C3, [128]],
19
+ [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
20
+ [-1, 6, C3, [256]],
21
+ [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
22
+ [-1, 9, C3, [512]],
23
+ [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
24
+ [-1, 3, C3, [1024]],
25
+ [-1, 1, SPPF, [1024, 5]], # 9
26
+ ]
27
+
28
+ # YOLOv5 v6.0 head
29
+ head: [
30
+ [-1, 1, Conv, [512, 1, 1]],
31
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32
+ [[-1, 6], 1, Concat, [1]], # cat backbone P4
33
+ [-1, 3, C3, [512, False]], # 13
34
+
35
+ [-1, 1, Conv, [256, 1, 1]],
36
+ [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37
+ [[-1, 4], 1, Concat, [1]], # cat backbone P3
38
+ [-1, 3, C3, [256, False]], # 17 (P3/8-small)
39
+
40
+ [-1, 1, Conv, [256, 3, 2]],
41
+ [[-1, 14], 1, Concat, [1]], # cat head P4
42
+ [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
43
+
44
+ [-1, 1, Conv, [512, 3, 2]],
45
+ [[-1, 10], 1, Concat, [1]], # cat head P5
46
+ [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
47
+
48
+ [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
49
+ ]
requirements.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 requirements
2
+ # Usage: pip install -r requirements.txt
3
+
4
+ # Base ------------------------------------------------------------------------
5
+ gitpython>=3.1.30
6
+ matplotlib>=3.3
7
+ numpy>=1.23.5
8
+ opencv-python>=4.1.1
9
+ pillow>=10.3.0
10
+ psutil # system resources
11
+ PyYAML>=5.3.1
12
+ requests>=2.32.2
13
+ scipy>=1.4.1
14
+ thop>=0.1.1 # FLOPs computation
15
+ torch>=1.8.0 # see https://pytorch.org/get-started/locally (recommended)
16
+ torchvision>=0.9.0
17
+ tqdm>=4.66.3
18
+ ultralytics>=8.2.64 # https://ultralytics.com
19
+ # protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012
20
+
21
+ # Logging ---------------------------------------------------------------------
22
+ # tensorboard>=2.4.1
23
+ # clearml>=1.2.0
24
+ # comet
25
+
26
+ # Plotting --------------------------------------------------------------------
27
+ pandas>=1.1.4
28
+ seaborn>=0.11.0
29
+
30
+ # Export ----------------------------------------------------------------------
31
+ # coremltools>=6.0 # CoreML export
32
+ # onnx>=1.10.0 # ONNX export
33
+ # onnx-simplifier>=0.4.1 # ONNX simplifier
34
+ # nvidia-pyindex # TensorRT export
35
+ # nvidia-tensorrt # TensorRT export
36
+ # scikit-learn<=1.1.2 # CoreML quantization
37
+ # tensorflow>=2.4.0,<=2.13.1 # TF exports (-cpu, -aarch64, -macos)
38
+ # tensorflowjs>=3.9.0 # TF.js export
39
+ # openvino-dev>=2023.0 # OpenVINO export
40
+
41
+ # Deploy ----------------------------------------------------------------------
42
+ packaging # Migration of deprecated pkg_resources packages
43
+ setuptools>=70.0.0 # Snyk vulnerability fix
44
+ # tritonclient[all]~=2.24.0
45
+
46
+ # Extras ----------------------------------------------------------------------
47
+ # ipython # interactive notebook
48
+ # mss # screenshots
49
+ # albumentations>=1.0.3
50
+ # pycocotools>=2.0.6 # COCO mAP
51
+ urllib3>=2.5.0 ; python_version > "3.8" # not directly required, pinned by Snyk to avoid a vulnerability
utils/__init__.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+ """utils/initialization."""
3
+
4
+ import contextlib
5
+ import platform
6
+ import threading
7
+
8
+
9
+ def emojis(str=""):
10
+ """Returns an emoji-safe version of a string, stripped of emojis on Windows platforms."""
11
+ return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
12
+
13
+
14
+ class TryExcept(contextlib.ContextDecorator):
15
+ """A context manager and decorator for error handling that prints an optional message with emojis on exception."""
16
+
17
+ def __init__(self, msg=""):
18
+ """Initializes TryExcept with an optional message, used as a decorator or context manager for error handling."""
19
+ self.msg = msg
20
+
21
+ def __enter__(self):
22
+ """Enter the runtime context related to this object for error handling with an optional message."""
23
+ pass
24
+
25
+ def __exit__(self, exc_type, value, traceback):
26
+ """Context manager exit method that prints an error message with emojis if an exception occurred, always returns
27
+ True.
28
+ """
29
+ if value:
30
+ print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
31
+ return True
32
+
33
+
34
+ def threaded(func):
35
+ """Decorator @threaded to run a function in a separate thread, returning the thread instance."""
36
+
37
+ def wrapper(*args, **kwargs):
38
+ """Runs the decorated function in a separate daemon thread and returns the thread instance."""
39
+ thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
40
+ thread.start()
41
+ return thread
42
+
43
+ return wrapper
44
+
45
+
46
+ def join_threads(verbose=False):
47
+ """Joins all daemon threads, optionally printing their names if verbose is True.
48
+
49
+ Example: atexit.register(lambda: join_threads())
50
+ """
51
+ main_thread = threading.current_thread()
52
+ for t in threading.enumerate():
53
+ if t is not main_thread:
54
+ if verbose:
55
+ print(f"Joining thread {t.name}")
56
+ t.join()
57
+
58
+
59
+ def notebook_init(verbose=True):
60
+ """Initializes notebook environment by checking requirements, cleaning up, and displaying system info."""
61
+ print("Checking setup...")
62
+
63
+ import os
64
+ import shutil
65
+
66
+ from ultralytics.utils.checks import check_requirements
67
+
68
+ from utils.general import check_font, is_colab
69
+ from utils.torch_utils import select_device # imports
70
+
71
+ check_font()
72
+
73
+ import psutil
74
+
75
+ if check_requirements("wandb", install=False):
76
+ os.system("pip uninstall -y wandb") # eliminate unexpected account creation prompt with infinite hang
77
+ if is_colab():
78
+ shutil.rmtree("/content/sample_data", ignore_errors=True) # remove colab /sample_data directory
79
+
80
+ # System info
81
+ display = None
82
+ if verbose:
83
+ gb = 1 << 30 # bytes to GiB (1024 ** 3)
84
+ ram = psutil.virtual_memory().total
85
+ total, _used, free = shutil.disk_usage("/")
86
+ with contextlib.suppress(Exception): # clear display if ipython is installed
87
+ from IPython import display
88
+
89
+ display.clear_output()
90
+ s = f"({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)"
91
+ else:
92
+ s = ""
93
+
94
+ select_device(newline=False)
95
+ print(emojis(f"Setup complete ✅ {s}"))
96
+ return display
utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (4.88 kB). View file
 
utils/__pycache__/augmentations.cpython-312.pyc ADDED
Binary file (28.2 kB). View file
 
utils/__pycache__/autoanchor.cpython-312.pyc ADDED
Binary file (12.3 kB). View file
 
utils/__pycache__/dataloaders.cpython-312.pyc ADDED
Binary file (86.7 kB). View file
 
utils/__pycache__/downloads.cpython-312.pyc ADDED
Binary file (7.83 kB). View file
 
utils/__pycache__/general.cpython-312.pyc ADDED
Binary file (74.2 kB). View file
 
utils/__pycache__/metrics.cpython-312.pyc ADDED
Binary file (21.9 kB). View file