Spaces:
Sleeping
Sleeping
root16285 commited on
Commit ·
7d6df10
1
Parent(s): 0e1303f
Add complete FastAPI Docker app (model downloaded at build)
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +1 -0
- Dockerfile +42 -0
- README.md +136 -5
- models/__init__.py +1 -0
- models/__pycache__/__init__.cpython-312.pyc +0 -0
- models/__pycache__/common.cpython-312.pyc +0 -0
- models/__pycache__/experimental.cpython-312.pyc +0 -0
- models/__pycache__/yolo.cpython-312.pyc +0 -0
- models/common.py +1110 -0
- models/experimental.py +130 -0
- models/hub/anchors.yaml +57 -0
- models/hub/yolov3-spp.yaml +52 -0
- models/hub/yolov3-tiny.yaml +42 -0
- models/hub/yolov3.yaml +52 -0
- models/hub/yolov5-bifpn.yaml +49 -0
- models/hub/yolov5-fpn.yaml +43 -0
- models/hub/yolov5-p2.yaml +55 -0
- models/hub/yolov5-p34.yaml +42 -0
- models/hub/yolov5-p6.yaml +57 -0
- models/hub/yolov5-p7.yaml +68 -0
- models/hub/yolov5-panet.yaml +49 -0
- models/hub/yolov5l6.yaml +61 -0
- models/hub/yolov5m6.yaml +61 -0
- models/hub/yolov5n6.yaml +61 -0
- models/hub/yolov5s-LeakyReLU.yaml +50 -0
- models/hub/yolov5s-ghost.yaml +49 -0
- models/hub/yolov5s-transformer.yaml +49 -0
- models/hub/yolov5s6.yaml +61 -0
- models/hub/yolov5x6.yaml +61 -0
- models/segment/yolov5l-seg.yaml +49 -0
- models/segment/yolov5m-seg.yaml +49 -0
- models/segment/yolov5n-seg.yaml +49 -0
- models/segment/yolov5s-seg.yaml +49 -0
- models/segment/yolov5x-seg.yaml +49 -0
- models/tf.py +775 -0
- models/yolo.py +496 -0
- models/yolov5l.yaml +49 -0
- models/yolov5m.yaml +49 -0
- models/yolov5n.yaml +49 -0
- models/yolov5s.yaml +49 -0
- models/yolov5x.yaml +49 -0
- requirements.txt +51 -0
- utils/__init__.py +96 -0
- utils/__pycache__/__init__.cpython-312.pyc +0 -0
- utils/__pycache__/augmentations.cpython-312.pyc +0 -0
- utils/__pycache__/autoanchor.cpython-312.pyc +0 -0
- utils/__pycache__/dataloaders.cpython-312.pyc +0 -0
- utils/__pycache__/downloads.cpython-312.pyc +0 -0
- utils/__pycache__/general.cpython-312.pyc +0 -0
- utils/__pycache__/metrics.cpython-312.pyc +0 -0
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
yolov5s.pt
|
Dockerfile
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
libgl1-mesa-glx \
|
| 8 |
+
libglib2.0-0 \
|
| 9 |
+
libsm6 \
|
| 10 |
+
libxext6 \
|
| 11 |
+
libxrender-dev \
|
| 12 |
+
libgomp1 \
|
| 13 |
+
wget \
|
| 14 |
+
git \
|
| 15 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
# Copy requirements and install Python dependencies
|
| 18 |
+
COPY requirements.txt .
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt && \
|
| 20 |
+
pip install --no-cache-dir fastapi uvicorn[standard]==0.32.0 python-multipart websockets
|
| 21 |
+
|
| 22 |
+
# Copy the entire application
|
| 23 |
+
COPY . /app
|
| 24 |
+
|
| 25 |
+
# Download YOLOv5 model if not present
|
| 26 |
+
RUN if [ ! -f yolov5s.pt ]; then \
|
| 27 |
+
wget -q https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt; \
|
| 28 |
+
fi
|
| 29 |
+
|
| 30 |
+
# Create user for security
|
| 31 |
+
RUN useradd -m -u 1000 user && chown -R user:user /app
|
| 32 |
+
USER user
|
| 33 |
+
|
| 34 |
+
# Set environment variables
|
| 35 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 36 |
+
ENV PORT=7860
|
| 37 |
+
|
| 38 |
+
# Expose the port
|
| 39 |
+
EXPOSE 7860
|
| 40 |
+
|
| 41 |
+
# Start the FastAPI application
|
| 42 |
+
CMD ["sh", "-c", "cd /app/webapp/backend && uvicorn main:app --host 0.0.0.0 --port 7860"]
|
README.md
CHANGED
|
@@ -1,11 +1,142 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: ZKA Marchés CI - Interface Complète
|
| 3 |
+
emoji: 🚦
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
license: mit
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🚀 ZKA Marchés CI - Détection d'Objets (Interface Complète)
|
| 13 |
+
|
| 14 |
+
## 📋 Description
|
| 15 |
+
|
| 16 |
+
Application complète de détection d'objets en temps réel avec **YOLOv5** pour la gestion intelligente des flux dans les marchés d'Abidjan.
|
| 17 |
+
|
| 18 |
+
## ✨ Fonctionnalités
|
| 19 |
+
|
| 20 |
+
### 🎥 Webcam en Direct
|
| 21 |
+
- Détection en temps réel via WebSocket
|
| 22 |
+
- Affichage FPS et latence en direct
|
| 23 |
+
- Ajustement du seuil de confiance dynamique
|
| 24 |
+
- Changement de modèle YOLOv5 (s/m/l) à la volée
|
| 25 |
+
|
| 26 |
+
### 📤 Upload d'Images
|
| 27 |
+
- Upload multiple avec drag & drop
|
| 28 |
+
- Traitement par lot
|
| 29 |
+
- Résultats avec bounding boxes colorées
|
| 30 |
+
- Statistiques détaillées par image
|
| 31 |
+
|
| 32 |
+
### 📊 Dashboard Interactif
|
| 33 |
+
- Statistiques globales en temps réel
|
| 34 |
+
- Graphiques interactifs avec Chart.js
|
| 35 |
+
- Objets les plus détectés
|
| 36 |
+
- Performance du modèle
|
| 37 |
+
|
| 38 |
+
### 💾 Historique
|
| 39 |
+
- Toutes les détections sauvegardées
|
| 40 |
+
- Consultation de l'historique complet
|
| 41 |
+
- Possibilité d'effacer l'historique
|
| 42 |
+
|
| 43 |
+
### 🎨 Interface Moderne
|
| 44 |
+
- Design responsive (mobile-friendly)
|
| 45 |
+
- Mode sombre/clair
|
| 46 |
+
- Animations fluides
|
| 47 |
+
- TailwindCSS + Font Awesome
|
| 48 |
+
|
| 49 |
+
## 🛠️ Technologies
|
| 50 |
+
|
| 51 |
+
- **Backend** : FastAPI + WebSocket
|
| 52 |
+
- **Frontend** : HTML5 + Vanilla JavaScript + TailwindCSS
|
| 53 |
+
- **IA** : YOLOv5 (Ultralytics)
|
| 54 |
+
- **Visualisation** : Chart.js
|
| 55 |
+
- **Déploiement** : Docker on Hugging Face Spaces
|
| 56 |
+
|
| 57 |
+
## 🎯 Utilisation
|
| 58 |
+
|
| 59 |
+
### Webcam en Direct
|
| 60 |
+
1. Cliquez sur l'onglet "Webcam en Direct"
|
| 61 |
+
2. Cliquez sur "Démarrer"
|
| 62 |
+
3. Autorisez l'accès à votre webcam
|
| 63 |
+
4. La détection se fait automatiquement en temps réel
|
| 64 |
+
|
| 65 |
+
### Upload d'Images
|
| 66 |
+
1. Cliquez sur l'onglet "Upload Images"
|
| 67 |
+
2. Glissez vos images dans la zone ou cliquez pour sélectionner
|
| 68 |
+
3. Cliquez sur "Analyser les images"
|
| 69 |
+
4. Consultez les résultats avec bounding boxes
|
| 70 |
+
|
| 71 |
+
### Dashboard
|
| 72 |
+
- Consultez les statistiques globales
|
| 73 |
+
- Visualisez les graphiques de performance
|
| 74 |
+
- Analysez les tendances de détection
|
| 75 |
+
|
| 76 |
+
### Historique
|
| 77 |
+
- Retrouvez toutes vos détections précédentes
|
| 78 |
+
- Consultez les détails de chaque détection
|
| 79 |
+
|
| 80 |
+
## 📊 Classes Détectées
|
| 81 |
+
|
| 82 |
+
L'application détecte **80 classes d'objets** du dataset COCO, incluant:
|
| 83 |
+
|
| 84 |
+
**🚶 Personnes & Mobilité**
|
| 85 |
+
- Personnes, véhicules (voitures, motos, bus, camions)
|
| 86 |
+
- Vélos, trottinettes
|
| 87 |
+
|
| 88 |
+
**🏪 Infrastructure & Commerce**
|
| 89 |
+
- Mobilier urbain (bancs, tables, chaises)
|
| 90 |
+
- Objets de marché
|
| 91 |
+
- Signalisation
|
| 92 |
+
|
| 93 |
+
## 🎓 Contexte Académique
|
| 94 |
+
|
| 95 |
+
Projet développé à **ESATIC** (École Supérieure Africaine des TIC) pour la gestion urbaine en Afrique.
|
| 96 |
+
|
| 97 |
+
**Marchés ciblés:**
|
| 98 |
+
- Adjamé (Abidjan)
|
| 99 |
+
- Treichville (Abidjan)
|
| 100 |
+
- Cocody (Abidjan)
|
| 101 |
+
- Yopougon (Abidjan)
|
| 102 |
+
|
| 103 |
+
## 🔧 Configuration
|
| 104 |
+
|
| 105 |
+
L'application écoute sur le **port 7860** (requis par Hugging Face Spaces).
|
| 106 |
+
|
| 107 |
+
## 📱 Compatibilité
|
| 108 |
+
|
| 109 |
+
- ✅ Desktop (Chrome, Firefox, Edge, Safari)
|
| 110 |
+
- ✅ Mobile (iOS, Android)
|
| 111 |
+
- ✅ Tablette
|
| 112 |
+
|
| 113 |
+
## 📝 API Endpoints
|
| 114 |
+
|
| 115 |
+
- `GET /` - Interface principale
|
| 116 |
+
- `POST /detect` - Détection sur une image
|
| 117 |
+
- `POST /detect/batch` - Détection sur plusieurs images
|
| 118 |
+
- `WS /ws` - WebSocket pour webcam temps réel
|
| 119 |
+
- `GET /statistics` - Statistiques globales
|
| 120 |
+
- `GET /history` - Historique des détections
|
| 121 |
+
- `GET /docs` - Documentation API interactive
|
| 122 |
+
|
| 123 |
+
## 🚀 Démarrage Local
|
| 124 |
+
|
| 125 |
+
```bash
|
| 126 |
+
# Installer les dépendances
|
| 127 |
+
pip install -r requirements.txt
|
| 128 |
+
|
| 129 |
+
# Lancer le serveur
|
| 130 |
+
cd webapp/backend
|
| 131 |
+
python main.py
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
Accéder à : http://localhost:8001
|
| 135 |
+
|
| 136 |
+
## 📄 Licence
|
| 137 |
+
|
| 138 |
+
MIT License
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
**Développé avec ❤️ pour la gestion intelligente des marchés africains**
|
models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
models/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
models/__pycache__/common.cpython-312.pyc
ADDED
|
Binary file (79.1 kB). View file
|
|
|
models/__pycache__/experimental.cpython-312.pyc
ADDED
|
Binary file (8.95 kB). View file
|
|
|
models/__pycache__/yolo.cpython-312.pyc
ADDED
|
Binary file (33.3 kB). View file
|
|
|
models/common.py
ADDED
|
@@ -0,0 +1,1110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
"""Common modules."""
|
| 3 |
+
|
| 4 |
+
import ast
|
| 5 |
+
import contextlib
|
| 6 |
+
import json
|
| 7 |
+
import math
|
| 8 |
+
import platform
|
| 9 |
+
import warnings
|
| 10 |
+
import zipfile
|
| 11 |
+
from collections import OrderedDict, namedtuple
|
| 12 |
+
from copy import copy
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from urllib.parse import urlparse
|
| 15 |
+
|
| 16 |
+
import cv2
|
| 17 |
+
import numpy as np
|
| 18 |
+
import pandas as pd
|
| 19 |
+
import requests
|
| 20 |
+
import torch
|
| 21 |
+
import torch.nn as nn
|
| 22 |
+
from PIL import Image
|
| 23 |
+
from torch.cuda import amp
|
| 24 |
+
|
| 25 |
+
# Import 'ultralytics' package or install if missing
|
| 26 |
+
try:
|
| 27 |
+
import ultralytics
|
| 28 |
+
|
| 29 |
+
assert hasattr(ultralytics, "__version__") # verify package is not directory
|
| 30 |
+
except (ImportError, AssertionError):
|
| 31 |
+
import os
|
| 32 |
+
|
| 33 |
+
os.system("pip install -U ultralytics")
|
| 34 |
+
import ultralytics
|
| 35 |
+
|
| 36 |
+
from ultralytics.utils.plotting import Annotator, colors, save_one_box
|
| 37 |
+
|
| 38 |
+
from utils import TryExcept
|
| 39 |
+
from utils.dataloaders import exif_transpose, letterbox
|
| 40 |
+
from utils.general import (
|
| 41 |
+
LOGGER,
|
| 42 |
+
ROOT,
|
| 43 |
+
Profile,
|
| 44 |
+
check_requirements,
|
| 45 |
+
check_suffix,
|
| 46 |
+
check_version,
|
| 47 |
+
colorstr,
|
| 48 |
+
increment_path,
|
| 49 |
+
is_jupyter,
|
| 50 |
+
make_divisible,
|
| 51 |
+
non_max_suppression,
|
| 52 |
+
scale_boxes,
|
| 53 |
+
xywh2xyxy,
|
| 54 |
+
xyxy2xywh,
|
| 55 |
+
yaml_load,
|
| 56 |
+
)
|
| 57 |
+
from utils.torch_utils import copy_attr, smart_inference_mode
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def autopad(k, p=None, d=1):
|
| 61 |
+
"""Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.
|
| 62 |
+
|
| 63 |
+
`k`: kernel, `p`: padding, `d`: dilation.
|
| 64 |
+
"""
|
| 65 |
+
if d > 1:
|
| 66 |
+
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
|
| 67 |
+
if p is None:
|
| 68 |
+
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
| 69 |
+
return p
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class Conv(nn.Module):
|
| 73 |
+
"""Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""
|
| 74 |
+
|
| 75 |
+
default_act = nn.SiLU() # default activation
|
| 76 |
+
|
| 77 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
|
| 78 |
+
"""Initializes a standard convolution layer with optional batch normalization and activation."""
|
| 79 |
+
super().__init__()
|
| 80 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
|
| 81 |
+
self.bn = nn.BatchNorm2d(c2)
|
| 82 |
+
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
|
| 83 |
+
|
| 84 |
+
def forward(self, x):
|
| 85 |
+
"""Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""
|
| 86 |
+
return self.act(self.bn(self.conv(x)))
|
| 87 |
+
|
| 88 |
+
def forward_fuse(self, x):
|
| 89 |
+
"""Applies a fused convolution and activation function to the input tensor `x`."""
|
| 90 |
+
return self.act(self.conv(x))
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class DWConv(Conv):
|
| 94 |
+
"""Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""
|
| 95 |
+
|
| 96 |
+
def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
|
| 97 |
+
"""Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output
|
| 98 |
+
channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act).
|
| 99 |
+
"""
|
| 100 |
+
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class DWConvTranspose2d(nn.ConvTranspose2d):
|
| 104 |
+
"""A depth-wise transpose convolutional layer for upsampling in neural networks, particularly in YOLOv5 models."""
|
| 105 |
+
|
| 106 |
+
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
|
| 107 |
+
"""Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels
|
| 108 |
+
(c2), kernel size (k), stride (s), input padding (p1), output padding (p2).
|
| 109 |
+
"""
|
| 110 |
+
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
class TransformerLayer(nn.Module):
|
| 114 |
+
"""Transformer layer with multihead attention and linear layers, optimized by removing LayerNorm."""
|
| 115 |
+
|
| 116 |
+
def __init__(self, c, num_heads):
|
| 117 |
+
"""Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.
|
| 118 |
+
|
| 119 |
+
See as described in https://arxiv.org/abs/2010.11929.
|
| 120 |
+
"""
|
| 121 |
+
super().__init__()
|
| 122 |
+
self.q = nn.Linear(c, c, bias=False)
|
| 123 |
+
self.k = nn.Linear(c, c, bias=False)
|
| 124 |
+
self.v = nn.Linear(c, c, bias=False)
|
| 125 |
+
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
| 126 |
+
self.fc1 = nn.Linear(c, c, bias=False)
|
| 127 |
+
self.fc2 = nn.Linear(c, c, bias=False)
|
| 128 |
+
|
| 129 |
+
def forward(self, x):
|
| 130 |
+
"""Performs forward pass using MultiheadAttention and two linear transformations with residual connections."""
|
| 131 |
+
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
| 132 |
+
x = self.fc2(self.fc1(x)) + x
|
| 133 |
+
return x
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
class TransformerBlock(nn.Module):
|
| 137 |
+
"""A Transformer block for vision tasks with convolution, position embeddings, and Transformer layers."""
|
| 138 |
+
|
| 139 |
+
def __init__(self, c1, c2, num_heads, num_layers):
|
| 140 |
+
"""Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specified
|
| 141 |
+
layers.
|
| 142 |
+
"""
|
| 143 |
+
super().__init__()
|
| 144 |
+
self.conv = None
|
| 145 |
+
if c1 != c2:
|
| 146 |
+
self.conv = Conv(c1, c2)
|
| 147 |
+
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
| 148 |
+
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
|
| 149 |
+
self.c2 = c2
|
| 150 |
+
|
| 151 |
+
def forward(self, x):
|
| 152 |
+
"""Processes input through an optional convolution, followed by Transformer layers and position embeddings for
|
| 153 |
+
object detection.
|
| 154 |
+
"""
|
| 155 |
+
if self.conv is not None:
|
| 156 |
+
x = self.conv(x)
|
| 157 |
+
b, _, w, h = x.shape
|
| 158 |
+
p = x.flatten(2).permute(2, 0, 1)
|
| 159 |
+
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
class Bottleneck(nn.Module):
|
| 163 |
+
"""A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""
|
| 164 |
+
|
| 165 |
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
|
| 166 |
+
"""Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel
|
| 167 |
+
expansion.
|
| 168 |
+
"""
|
| 169 |
+
super().__init__()
|
| 170 |
+
c_ = int(c2 * e) # hidden channels
|
| 171 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
| 172 |
+
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
| 173 |
+
self.add = shortcut and c1 == c2
|
| 174 |
+
|
| 175 |
+
def forward(self, x):
|
| 176 |
+
"""Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is a
|
| 177 |
+
tensor.
|
| 178 |
+
"""
|
| 179 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
class BottleneckCSP(nn.Module):
|
| 183 |
+
"""CSP bottleneck layer for feature extraction with cross-stage partial connections and optional shortcuts."""
|
| 184 |
+
|
| 185 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
| 186 |
+
"""Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,
|
| 187 |
+
groups, expansion.
|
| 188 |
+
"""
|
| 189 |
+
super().__init__()
|
| 190 |
+
c_ = int(c2 * e) # hidden channels
|
| 191 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
| 192 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
| 193 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
| 194 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
| 195 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
| 196 |
+
self.act = nn.SiLU()
|
| 197 |
+
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
| 198 |
+
|
| 199 |
+
def forward(self, x):
|
| 200 |
+
"""Performs forward pass by applying layers, activation, and concatenation on input x, returning feature-
|
| 201 |
+
enhanced output.
|
| 202 |
+
"""
|
| 203 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
| 204 |
+
y2 = self.cv2(x)
|
| 205 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
class CrossConv(nn.Module):
|
| 209 |
+
"""Implements a cross convolution layer with downsampling, expansion, and optional shortcut."""
|
| 210 |
+
|
| 211 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
| 212 |
+
"""Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` output
|
| 213 |
+
channels.
|
| 214 |
+
|
| 215 |
+
Inputs are ch_in, ch_out, kernel, stride, groups, expansion, shortcut.
|
| 216 |
+
"""
|
| 217 |
+
super().__init__()
|
| 218 |
+
c_ = int(c2 * e) # hidden channels
|
| 219 |
+
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
| 220 |
+
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
| 221 |
+
self.add = shortcut and c1 == c2
|
| 222 |
+
|
| 223 |
+
def forward(self, x):
|
| 224 |
+
"""Performs feature sampling, expanding, and applies shortcut if channels match; expects `x` input tensor."""
|
| 225 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
class C3(nn.Module):
|
| 229 |
+
"""Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""
|
| 230 |
+
|
| 231 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
| 232 |
+
"""Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group
|
| 233 |
+
convolutions, and expansion.
|
| 234 |
+
"""
|
| 235 |
+
super().__init__()
|
| 236 |
+
c_ = int(c2 * e) # hidden channels
|
| 237 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
| 238 |
+
self.cv2 = Conv(c1, c_, 1, 1)
|
| 239 |
+
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
| 240 |
+
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
| 241 |
+
|
| 242 |
+
def forward(self, x):
|
| 243 |
+
"""Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""
|
| 244 |
+
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class C3x(C3):
|
| 248 |
+
"""Extends the C3 module with cross-convolutions for enhanced feature extraction in neural networks."""
|
| 249 |
+
|
| 250 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
| 251 |
+
"""Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,
|
| 252 |
+
and expansion.
|
| 253 |
+
"""
|
| 254 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
| 255 |
+
c_ = int(c2 * e)
|
| 256 |
+
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
class C3TR(C3):
|
| 260 |
+
"""C3 module with TransformerBlock for enhanced feature extraction in object detection models."""
|
| 261 |
+
|
| 262 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
| 263 |
+
"""Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcut
|
| 264 |
+
config, group, and expansion.
|
| 265 |
+
"""
|
| 266 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
| 267 |
+
c_ = int(c2 * e)
|
| 268 |
+
self.m = TransformerBlock(c_, c_, 4, n)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
class C3SPP(C3):
|
| 272 |
+
"""Extends the C3 module with an SPP layer for enhanced spatial feature extraction and customizable channels."""
|
| 273 |
+
|
| 274 |
+
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
| 275 |
+
"""Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernel
|
| 276 |
+
sizes, shortcut, group, and expansion ratio.
|
| 277 |
+
"""
|
| 278 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
| 279 |
+
c_ = int(c2 * e)
|
| 280 |
+
self.m = SPP(c_, c_, k)
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
class C3Ghost(C3):
|
| 284 |
+
"""Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in YOLOv5."""
|
| 285 |
+
|
| 286 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
| 287 |
+
"""Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""
|
| 288 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
| 289 |
+
c_ = int(c2 * e) # hidden channels
|
| 290 |
+
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
class SPP(nn.Module):
|
| 294 |
+
"""Implements Spatial Pyramid Pooling (SPP) for feature extraction, ref: https://arxiv.org/abs/1406.4729."""
|
| 295 |
+
|
| 296 |
+
def __init__(self, c1, c2, k=(5, 9, 13)):
|
| 297 |
+
"""Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input
|
| 298 |
+
channels), c2 (output channels), k (kernel sizes).
|
| 299 |
+
"""
|
| 300 |
+
super().__init__()
|
| 301 |
+
c_ = c1 // 2 # hidden channels
|
| 302 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
| 303 |
+
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
| 304 |
+
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
| 305 |
+
|
| 306 |
+
def forward(self, x):
|
| 307 |
+
"""Applies convolution and max pooling layers to the input tensor `x`, concatenates results, and returns output
|
| 308 |
+
tensor.
|
| 309 |
+
"""
|
| 310 |
+
x = self.cv1(x)
|
| 311 |
+
with warnings.catch_warnings():
|
| 312 |
+
warnings.simplefilter("ignore") # suppress torch 1.9.0 max_pool2d() warning
|
| 313 |
+
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
class SPPF(nn.Module):
|
| 317 |
+
"""Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv5 models."""
|
| 318 |
+
|
| 319 |
+
def __init__(self, c1, c2, k=5):
|
| 320 |
+
"""Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution and
|
| 321 |
+
max pooling.
|
| 322 |
+
|
| 323 |
+
Equivalent to SPP(k=(5, 9, 13)).
|
| 324 |
+
"""
|
| 325 |
+
super().__init__()
|
| 326 |
+
c_ = c1 // 2 # hidden channels
|
| 327 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
| 328 |
+
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
| 329 |
+
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
| 330 |
+
|
| 331 |
+
def forward(self, x):
|
| 332 |
+
"""Processes input through a series of convolutions and max pooling operations for feature extraction."""
|
| 333 |
+
x = self.cv1(x)
|
| 334 |
+
with warnings.catch_warnings():
|
| 335 |
+
warnings.simplefilter("ignore") # suppress torch 1.9.0 max_pool2d() warning
|
| 336 |
+
y1 = self.m(x)
|
| 337 |
+
y2 = self.m(y1)
|
| 338 |
+
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
class Focus(nn.Module):
|
| 342 |
+
"""Focuses spatial information into channel space using slicing and convolution for efficient feature extraction."""
|
| 343 |
+
|
| 344 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
|
| 345 |
+
"""Initializes Focus module to concentrate width-height info into channel space with configurable convolution
|
| 346 |
+
parameters.
|
| 347 |
+
"""
|
| 348 |
+
super().__init__()
|
| 349 |
+
self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
|
| 350 |
+
# self.contract = Contract(gain=2)
|
| 351 |
+
|
| 352 |
+
def forward(self, x):
|
| 353 |
+
"""Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution."""
|
| 354 |
+
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
|
| 355 |
+
# return self.conv(self.contract(x))
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
class GhostConv(nn.Module):
|
| 359 |
+
"""Implements Ghost Convolution for efficient feature extraction, see https://github.com/huawei-noah/ghostnet."""
|
| 360 |
+
|
| 361 |
+
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
|
| 362 |
+
"""Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channels
|
| 363 |
+
for efficiency.
|
| 364 |
+
"""
|
| 365 |
+
super().__init__()
|
| 366 |
+
c_ = c2 // 2 # hidden channels
|
| 367 |
+
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
|
| 368 |
+
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
|
| 369 |
+
|
| 370 |
+
def forward(self, x):
|
| 371 |
+
"""Performs forward pass, concatenating outputs of two convolutions on input `x`: shape (B,C,H,W)."""
|
| 372 |
+
y = self.cv1(x)
|
| 373 |
+
return torch.cat((y, self.cv2(y)), 1)
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
class GhostBottleneck(nn.Module):
|
| 377 |
+
"""Efficient bottleneck layer using Ghost Convolutions, see https://github.com/huawei-noah/ghostnet."""
|
| 378 |
+
|
| 379 |
+
def __init__(self, c1, c2, k=3, s=1):
|
| 380 |
+
"""Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see
|
| 381 |
+
https://github.com/huawei-noah/ghostnet.
|
| 382 |
+
"""
|
| 383 |
+
super().__init__()
|
| 384 |
+
c_ = c2 // 2
|
| 385 |
+
self.conv = nn.Sequential(
|
| 386 |
+
GhostConv(c1, c_, 1, 1), # pw
|
| 387 |
+
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
| 388 |
+
GhostConv(c_, c2, 1, 1, act=False),
|
| 389 |
+
) # pw-linear
|
| 390 |
+
self.shortcut = (
|
| 391 |
+
nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
def forward(self, x):
|
| 395 |
+
"""Processes input through conv and shortcut layers, returning their summed output."""
|
| 396 |
+
return self.conv(x) + self.shortcut(x)
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
class Contract(nn.Module):
|
| 400 |
+
"""Contracts spatial dimensions into channel dimensions for efficient processing in neural networks."""
|
| 401 |
+
|
| 402 |
+
def __init__(self, gain=2):
|
| 403 |
+
"""Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape
|
| 404 |
+
(1,64,80,80) to (1,256,40,40).
|
| 405 |
+
"""
|
| 406 |
+
super().__init__()
|
| 407 |
+
self.gain = gain
|
| 408 |
+
|
| 409 |
+
def forward(self, x):
|
| 410 |
+
"""Processes input tensor to expand channel dimensions by contracting spatial dimensions, yielding output shape
|
| 411 |
+
`(b, c*s*s, h//s, w//s)`.
|
| 412 |
+
"""
|
| 413 |
+
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
| 414 |
+
s = self.gain
|
| 415 |
+
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
| 416 |
+
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
| 417 |
+
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
class Expand(nn.Module):
|
| 421 |
+
"""Expands spatial dimensions by redistributing channels, e.g., from (1,64,80,80) to (1,16,160,160)."""
|
| 422 |
+
|
| 423 |
+
def __init__(self, gain=2):
|
| 424 |
+
"""Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional
|
| 425 |
+
gain factor.
|
| 426 |
+
|
| 427 |
+
Example: x(1,64,80,80) to x(1,16,160,160).
|
| 428 |
+
"""
|
| 429 |
+
super().__init__()
|
| 430 |
+
self.gain = gain
|
| 431 |
+
|
| 432 |
+
def forward(self, x):
|
| 433 |
+
"""Processes input tensor x to expand spatial dims by redistributing channels, requiring C / gain^2 == 0."""
|
| 434 |
+
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
| 435 |
+
s = self.gain
|
| 436 |
+
x = x.view(b, s, s, c // s**2, h, w) # x(1,2,2,16,80,80)
|
| 437 |
+
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
| 438 |
+
return x.view(b, c // s**2, h * s, w * s) # x(1,16,160,160)
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
class Concat(nn.Module):
|
| 442 |
+
"""Concatenates tensors along a specified dimension for efficient tensor manipulation in neural networks."""
|
| 443 |
+
|
| 444 |
+
def __init__(self, dimension=1):
|
| 445 |
+
"""Initializes a Concat module to concatenate tensors along a specified dimension."""
|
| 446 |
+
super().__init__()
|
| 447 |
+
self.d = dimension
|
| 448 |
+
|
| 449 |
+
def forward(self, x):
|
| 450 |
+
"""Concatenates a list of tensors along a specified dims; `x` is a list of tensors, `dimension` is an int."""
|
| 451 |
+
return torch.cat(x, self.d)
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
class DetectMultiBackend(nn.Module):
|
| 455 |
+
"""YOLOv5 MultiBackend class for inference on various backends including PyTorch, ONNX, TensorRT, and more."""
|
| 456 |
+
|
| 457 |
+
def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
|
| 458 |
+
"""Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""
|
| 459 |
+
# PyTorch: weights = *.pt
|
| 460 |
+
# TorchScript: *.torchscript
|
| 461 |
+
# ONNX Runtime: *.onnx
|
| 462 |
+
# ONNX OpenCV DNN: *.onnx --dnn
|
| 463 |
+
# OpenVINO: *_openvino_model
|
| 464 |
+
# CoreML: *.mlpackage
|
| 465 |
+
# TensorRT: *.engine
|
| 466 |
+
# TensorFlow SavedModel: *_saved_model
|
| 467 |
+
# TensorFlow GraphDef: *.pb
|
| 468 |
+
# TensorFlow Lite: *.tflite
|
| 469 |
+
# TensorFlow Edge TPU: *_edgetpu.tflite
|
| 470 |
+
# PaddlePaddle: *_paddle_model
|
| 471 |
+
from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
|
| 472 |
+
|
| 473 |
+
super().__init__()
|
| 474 |
+
w = str(weights[0] if isinstance(weights, list) else weights)
|
| 475 |
+
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
|
| 476 |
+
fp16 &= pt or jit or onnx or engine or triton # FP16
|
| 477 |
+
nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
|
| 478 |
+
stride = 32 # default stride
|
| 479 |
+
cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA
|
| 480 |
+
if not (pt or triton):
|
| 481 |
+
w = attempt_download(w) # download if not local
|
| 482 |
+
|
| 483 |
+
if pt: # PyTorch
|
| 484 |
+
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
|
| 485 |
+
stride = max(int(model.stride.max()), 32) # model stride
|
| 486 |
+
names = model.module.names if hasattr(model, "module") else model.names # get class names
|
| 487 |
+
model.half() if fp16 else model.float()
|
| 488 |
+
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
| 489 |
+
elif jit: # TorchScript
|
| 490 |
+
LOGGER.info(f"Loading {w} for TorchScript inference...")
|
| 491 |
+
extra_files = {"config.txt": ""} # model metadata
|
| 492 |
+
model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
|
| 493 |
+
model.half() if fp16 else model.float()
|
| 494 |
+
if extra_files["config.txt"]: # load metadata dict
|
| 495 |
+
d = json.loads(
|
| 496 |
+
extra_files["config.txt"],
|
| 497 |
+
object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()},
|
| 498 |
+
)
|
| 499 |
+
stride, names = int(d["stride"]), d["names"]
|
| 500 |
+
elif dnn: # ONNX OpenCV DNN
|
| 501 |
+
LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")
|
| 502 |
+
check_requirements("opencv-python>=4.5.4")
|
| 503 |
+
net = cv2.dnn.readNetFromONNX(w)
|
| 504 |
+
elif onnx: # ONNX Runtime
|
| 505 |
+
LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
|
| 506 |
+
check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
|
| 507 |
+
import onnxruntime
|
| 508 |
+
|
| 509 |
+
providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]
|
| 510 |
+
session = onnxruntime.InferenceSession(w, providers=providers)
|
| 511 |
+
output_names = [x.name for x in session.get_outputs()]
|
| 512 |
+
meta = session.get_modelmeta().custom_metadata_map # metadata
|
| 513 |
+
if "stride" in meta:
|
| 514 |
+
stride, names = int(meta["stride"]), eval(meta["names"])
|
| 515 |
+
elif xml: # OpenVINO
|
| 516 |
+
LOGGER.info(f"Loading {w} for OpenVINO inference...")
|
| 517 |
+
check_requirements("openvino>=2023.0") # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
| 518 |
+
from openvino.runtime import Core, Layout, get_batch
|
| 519 |
+
|
| 520 |
+
core = Core()
|
| 521 |
+
if not Path(w).is_file(): # if not *.xml
|
| 522 |
+
w = next(Path(w).glob("*.xml")) # get *.xml file from *_openvino_model dir
|
| 523 |
+
ov_model = core.read_model(model=w, weights=Path(w).with_suffix(".bin"))
|
| 524 |
+
if ov_model.get_parameters()[0].get_layout().empty:
|
| 525 |
+
ov_model.get_parameters()[0].set_layout(Layout("NCHW"))
|
| 526 |
+
batch_dim = get_batch(ov_model)
|
| 527 |
+
if batch_dim.is_static:
|
| 528 |
+
batch_size = batch_dim.get_length()
|
| 529 |
+
ov_compiled_model = core.compile_model(ov_model, device_name="AUTO") # AUTO selects best available device
|
| 530 |
+
stride, names = self._load_metadata(Path(w).with_suffix(".yaml")) # load metadata
|
| 531 |
+
elif engine: # TensorRT
|
| 532 |
+
LOGGER.info(f"Loading {w} for TensorRT inference...")
|
| 533 |
+
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
| 534 |
+
|
| 535 |
+
check_version(trt.__version__, "7.0.0", hard=True) # require tensorrt>=7.0.0
|
| 536 |
+
if device.type == "cpu":
|
| 537 |
+
device = torch.device("cuda:0")
|
| 538 |
+
Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
|
| 539 |
+
logger = trt.Logger(trt.Logger.INFO)
|
| 540 |
+
with open(w, "rb") as f, trt.Runtime(logger) as runtime:
|
| 541 |
+
model = runtime.deserialize_cuda_engine(f.read())
|
| 542 |
+
context = model.create_execution_context()
|
| 543 |
+
bindings = OrderedDict()
|
| 544 |
+
output_names = []
|
| 545 |
+
fp16 = False # default updated below
|
| 546 |
+
dynamic = False
|
| 547 |
+
is_trt10 = not hasattr(model, "num_bindings")
|
| 548 |
+
num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)
|
| 549 |
+
for i in num:
|
| 550 |
+
if is_trt10:
|
| 551 |
+
name = model.get_tensor_name(i)
|
| 552 |
+
dtype = trt.nptype(model.get_tensor_dtype(name))
|
| 553 |
+
is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT
|
| 554 |
+
if is_input:
|
| 555 |
+
if -1 in tuple(model.get_tensor_shape(name)): # dynamic
|
| 556 |
+
dynamic = True
|
| 557 |
+
context.set_input_shape(name, tuple(model.get_profile_shape(name, 0)[2]))
|
| 558 |
+
if dtype == np.float16:
|
| 559 |
+
fp16 = True
|
| 560 |
+
else: # output
|
| 561 |
+
output_names.append(name)
|
| 562 |
+
shape = tuple(context.get_tensor_shape(name))
|
| 563 |
+
else:
|
| 564 |
+
name = model.get_binding_name(i)
|
| 565 |
+
dtype = trt.nptype(model.get_binding_dtype(i))
|
| 566 |
+
if model.binding_is_input(i):
|
| 567 |
+
if -1 in tuple(model.get_binding_shape(i)): # dynamic
|
| 568 |
+
dynamic = True
|
| 569 |
+
context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
|
| 570 |
+
if dtype == np.float16:
|
| 571 |
+
fp16 = True
|
| 572 |
+
else: # output
|
| 573 |
+
output_names.append(name)
|
| 574 |
+
shape = tuple(context.get_binding_shape(i))
|
| 575 |
+
im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
|
| 576 |
+
bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
|
| 577 |
+
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
| 578 |
+
batch_size = bindings["images"].shape[0] # if dynamic, this is instead max batch size
|
| 579 |
+
elif coreml: # CoreML
|
| 580 |
+
LOGGER.info(f"Loading {w} for CoreML inference...")
|
| 581 |
+
import coremltools as ct
|
| 582 |
+
|
| 583 |
+
model = ct.models.MLModel(w)
|
| 584 |
+
elif saved_model: # TF SavedModel
|
| 585 |
+
LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")
|
| 586 |
+
import tensorflow as tf
|
| 587 |
+
|
| 588 |
+
keras = False # assume TF1 saved_model
|
| 589 |
+
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
|
| 590 |
+
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
| 591 |
+
LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")
|
| 592 |
+
import tensorflow as tf
|
| 593 |
+
|
| 594 |
+
def wrap_frozen_graph(gd, inputs, outputs):
|
| 595 |
+
"""Wraps a TensorFlow GraphDef for inference, returning a pruned function."""
|
| 596 |
+
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
|
| 597 |
+
ge = x.graph.as_graph_element
|
| 598 |
+
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
|
| 599 |
+
|
| 600 |
+
def gd_outputs(gd):
|
| 601 |
+
"""Generates a sorted list of graph outputs excluding NoOp nodes and inputs, formatted as '<name>:0'."""
|
| 602 |
+
name_list, input_list = [], []
|
| 603 |
+
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
|
| 604 |
+
name_list.append(node.name)
|
| 605 |
+
input_list.extend(node.input)
|
| 606 |
+
return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))
|
| 607 |
+
|
| 608 |
+
gd = tf.Graph().as_graph_def() # TF GraphDef
|
| 609 |
+
with open(w, "rb") as f:
|
| 610 |
+
gd.ParseFromString(f.read())
|
| 611 |
+
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
|
| 612 |
+
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
| 613 |
+
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
|
| 614 |
+
from tflite_runtime.interpreter import Interpreter, load_delegate
|
| 615 |
+
except ImportError:
|
| 616 |
+
import tensorflow as tf
|
| 617 |
+
|
| 618 |
+
Interpreter, load_delegate = (
|
| 619 |
+
tf.lite.Interpreter,
|
| 620 |
+
tf.lite.experimental.load_delegate,
|
| 621 |
+
)
|
| 622 |
+
if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
|
| 623 |
+
LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")
|
| 624 |
+
delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[
|
| 625 |
+
platform.system()
|
| 626 |
+
]
|
| 627 |
+
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
|
| 628 |
+
else: # TFLite
|
| 629 |
+
LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
|
| 630 |
+
interpreter = Interpreter(model_path=w) # load TFLite model
|
| 631 |
+
interpreter.allocate_tensors() # allocate
|
| 632 |
+
input_details = interpreter.get_input_details() # inputs
|
| 633 |
+
output_details = interpreter.get_output_details() # outputs
|
| 634 |
+
# load metadata
|
| 635 |
+
with contextlib.suppress(zipfile.BadZipFile):
|
| 636 |
+
with zipfile.ZipFile(w, "r") as model:
|
| 637 |
+
meta_file = model.namelist()[0]
|
| 638 |
+
meta = ast.literal_eval(model.read(meta_file).decode("utf-8"))
|
| 639 |
+
stride, names = int(meta["stride"]), meta["names"]
|
| 640 |
+
elif tfjs: # TF.js
|
| 641 |
+
raise NotImplementedError("ERROR: YOLOv5 TF.js inference is not supported")
|
| 642 |
+
# PaddlePaddle
|
| 643 |
+
elif paddle:
|
| 644 |
+
LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
|
| 645 |
+
check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle>=3.0.0")
|
| 646 |
+
import paddle.inference as pdi
|
| 647 |
+
|
| 648 |
+
w = Path(w)
|
| 649 |
+
if w.is_dir():
|
| 650 |
+
model_file = next(w.rglob("*.json"), None)
|
| 651 |
+
params_file = next(w.rglob("*.pdiparams"), None)
|
| 652 |
+
elif w.suffix == ".pdiparams":
|
| 653 |
+
model_file = w.with_name("model.json")
|
| 654 |
+
params_file = w
|
| 655 |
+
else:
|
| 656 |
+
raise ValueError(f"Invalid model path {w}. Provide model directory or a .pdiparams file.")
|
| 657 |
+
|
| 658 |
+
if not (model_file and params_file and model_file.is_file() and params_file.is_file()):
|
| 659 |
+
raise FileNotFoundError(f"Model files not found in {w}. Both .json and .pdiparams files are required.")
|
| 660 |
+
|
| 661 |
+
config = pdi.Config(str(model_file), str(params_file))
|
| 662 |
+
if cuda:
|
| 663 |
+
config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
|
| 664 |
+
predictor = pdi.create_predictor(config)
|
| 665 |
+
input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
|
| 666 |
+
output_names = predictor.get_output_names()
|
| 667 |
+
|
| 668 |
+
elif triton: # NVIDIA Triton Inference Server
|
| 669 |
+
LOGGER.info(f"Using {w} as Triton Inference Server...")
|
| 670 |
+
check_requirements("tritonclient[all]")
|
| 671 |
+
from utils.triton import TritonRemoteModel
|
| 672 |
+
|
| 673 |
+
model = TritonRemoteModel(url=w)
|
| 674 |
+
nhwc = model.runtime.startswith("tensorflow")
|
| 675 |
+
else:
|
| 676 |
+
raise NotImplementedError(f"ERROR: {w} is not a supported format")
|
| 677 |
+
|
| 678 |
+
# class names
|
| 679 |
+
if "names" not in locals():
|
| 680 |
+
names = yaml_load(data)["names"] if data else {i: f"class{i}" for i in range(999)}
|
| 681 |
+
if names[0] == "n01440764" and len(names) == 1000: # ImageNet
|
| 682 |
+
names = yaml_load(ROOT / "data/ImageNet.yaml")["names"] # human-readable names
|
| 683 |
+
|
| 684 |
+
self.__dict__.update(locals()) # assign all variables to self
|
| 685 |
+
|
| 686 |
+
def forward(self, im, augment=False, visualize=False):
|
| 687 |
+
"""Performs YOLOv5 inference on input images with options for augmentation and visualization."""
|
| 688 |
+
_b, _ch, h, w = im.shape # batch, channel, height, width
|
| 689 |
+
if self.fp16 and im.dtype != torch.float16:
|
| 690 |
+
im = im.half() # to FP16
|
| 691 |
+
if self.nhwc:
|
| 692 |
+
im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
|
| 693 |
+
|
| 694 |
+
if self.pt: # PyTorch
|
| 695 |
+
y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
|
| 696 |
+
elif self.jit: # TorchScript
|
| 697 |
+
y = self.model(im)
|
| 698 |
+
elif self.dnn: # ONNX OpenCV DNN
|
| 699 |
+
im = im.cpu().numpy() # torch to numpy
|
| 700 |
+
self.net.setInput(im)
|
| 701 |
+
y = self.net.forward()
|
| 702 |
+
elif self.onnx: # ONNX Runtime
|
| 703 |
+
im = im.cpu().numpy() # torch to numpy
|
| 704 |
+
y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
|
| 705 |
+
elif self.xml: # OpenVINO
|
| 706 |
+
im = im.cpu().numpy() # FP32
|
| 707 |
+
y = list(self.ov_compiled_model(im).values())
|
| 708 |
+
elif self.engine: # TensorRT
|
| 709 |
+
if self.dynamic and im.shape != self.bindings["images"].shape:
|
| 710 |
+
i = self.model.get_binding_index("images")
|
| 711 |
+
self.context.set_binding_shape(i, im.shape) # reshape if dynamic
|
| 712 |
+
self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
|
| 713 |
+
for name in self.output_names:
|
| 714 |
+
i = self.model.get_binding_index(name)
|
| 715 |
+
self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
|
| 716 |
+
s = self.bindings["images"].shape
|
| 717 |
+
assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
|
| 718 |
+
self.binding_addrs["images"] = int(im.data_ptr())
|
| 719 |
+
self.context.execute_v2(list(self.binding_addrs.values()))
|
| 720 |
+
y = [self.bindings[x].data for x in sorted(self.output_names)]
|
| 721 |
+
elif self.coreml: # CoreML
|
| 722 |
+
im = im.cpu().numpy()
|
| 723 |
+
im = Image.fromarray((im[0] * 255).astype("uint8"))
|
| 724 |
+
# im = im.resize((192, 320), Image.BILINEAR)
|
| 725 |
+
y = self.model.predict({"image": im}) # coordinates are xywh normalized
|
| 726 |
+
if "confidence" in y:
|
| 727 |
+
box = xywh2xyxy(y["coordinates"] * [[w, h, w, h]]) # xyxy pixels
|
| 728 |
+
conf, cls = y["confidence"].max(1), y["confidence"].argmax(1).astype(np.float)
|
| 729 |
+
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
|
| 730 |
+
else:
|
| 731 |
+
y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
|
| 732 |
+
elif self.paddle: # PaddlePaddle
|
| 733 |
+
im = im.cpu().numpy().astype(np.float32)
|
| 734 |
+
self.input_handle.copy_from_cpu(im)
|
| 735 |
+
self.predictor.run()
|
| 736 |
+
y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
|
| 737 |
+
elif self.triton: # NVIDIA Triton Inference Server
|
| 738 |
+
y = self.model(im)
|
| 739 |
+
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
| 740 |
+
im = im.cpu().numpy()
|
| 741 |
+
if self.saved_model: # SavedModel
|
| 742 |
+
y = self.model(im, training=False) if self.keras else self.model(im)
|
| 743 |
+
elif self.pb: # GraphDef
|
| 744 |
+
y = self.frozen_func(x=self.tf.constant(im))
|
| 745 |
+
else: # Lite or Edge TPU
|
| 746 |
+
input = self.input_details[0]
|
| 747 |
+
int8 = input["dtype"] == np.uint8 # is TFLite quantized uint8 model
|
| 748 |
+
if int8:
|
| 749 |
+
scale, zero_point = input["quantization"]
|
| 750 |
+
im = (im / scale + zero_point).astype(np.uint8) # de-scale
|
| 751 |
+
self.interpreter.set_tensor(input["index"], im)
|
| 752 |
+
self.interpreter.invoke()
|
| 753 |
+
y = []
|
| 754 |
+
for output in self.output_details:
|
| 755 |
+
x = self.interpreter.get_tensor(output["index"])
|
| 756 |
+
if int8:
|
| 757 |
+
scale, zero_point = output["quantization"]
|
| 758 |
+
x = (x.astype(np.float32) - zero_point) * scale # re-scale
|
| 759 |
+
y.append(x)
|
| 760 |
+
if len(y) == 2 and len(y[1].shape) != 4:
|
| 761 |
+
y = list(reversed(y))
|
| 762 |
+
y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
|
| 763 |
+
y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
|
| 764 |
+
|
| 765 |
+
if isinstance(y, (list, tuple)):
|
| 766 |
+
return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
|
| 767 |
+
else:
|
| 768 |
+
return self.from_numpy(y)
|
| 769 |
+
|
| 770 |
+
def from_numpy(self, x):
|
| 771 |
+
"""Converts a NumPy array to a torch tensor, maintaining device compatibility."""
|
| 772 |
+
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
|
| 773 |
+
|
| 774 |
+
def warmup(self, imgsz=(1, 3, 640, 640)):
|
| 775 |
+
"""Performs a single inference warmup to initialize model weights, accepting an `imgsz` tuple for image size."""
|
| 776 |
+
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
|
| 777 |
+
if any(warmup_types) and (self.device.type != "cpu" or self.triton):
|
| 778 |
+
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
| 779 |
+
for _ in range(2 if self.jit else 1): #
|
| 780 |
+
self.forward(im) # warmup
|
| 781 |
+
|
| 782 |
+
@staticmethod
|
| 783 |
+
def _model_type(p="path/to/model.pt"):
|
| 784 |
+
"""Determines model type from file path or URL, supporting various export formats.
|
| 785 |
+
|
| 786 |
+
Example: path='path/to/model.onnx' -> type=onnx
|
| 787 |
+
"""
|
| 788 |
+
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
|
| 789 |
+
from export import export_formats
|
| 790 |
+
from utils.downloads import is_url
|
| 791 |
+
|
| 792 |
+
sf = list(export_formats().Suffix) # export suffixes
|
| 793 |
+
if not is_url(p, check=False):
|
| 794 |
+
check_suffix(p, sf) # checks
|
| 795 |
+
url = urlparse(p) # if url may be Triton inference server
|
| 796 |
+
types = [s in Path(p).name for s in sf]
|
| 797 |
+
types[8] &= not types[9] # tflite &= not edgetpu
|
| 798 |
+
triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])
|
| 799 |
+
return [*types, triton]
|
| 800 |
+
|
| 801 |
+
@staticmethod
|
| 802 |
+
def _load_metadata(f=Path("path/to/meta.yaml")):
|
| 803 |
+
"""Loads metadata from a YAML file, returning strides and names if the file exists, otherwise `None`."""
|
| 804 |
+
if f.exists():
|
| 805 |
+
d = yaml_load(f)
|
| 806 |
+
return d["stride"], d["names"] # assign stride, names
|
| 807 |
+
return None, None
|
| 808 |
+
|
| 809 |
+
|
| 810 |
+
class AutoShape(nn.Module):
|
| 811 |
+
"""AutoShape class for robust YOLOv5 inference with preprocessing, NMS, and support for various input formats."""
|
| 812 |
+
|
| 813 |
+
conf = 0.25 # NMS confidence threshold
|
| 814 |
+
iou = 0.45 # NMS IoU threshold
|
| 815 |
+
agnostic = False # NMS class-agnostic
|
| 816 |
+
multi_label = False # NMS multiple labels per box
|
| 817 |
+
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
|
| 818 |
+
max_det = 1000 # maximum number of detections per image
|
| 819 |
+
amp = False # Automatic Mixed Precision (AMP) inference
|
| 820 |
+
|
| 821 |
+
def __init__(self, model, verbose=True):
|
| 822 |
+
"""Initializes YOLOv5 model for inference, setting up attributes and preparing model for evaluation."""
|
| 823 |
+
super().__init__()
|
| 824 |
+
if verbose:
|
| 825 |
+
LOGGER.info("Adding AutoShape... ")
|
| 826 |
+
copy_attr(self, model, include=("yaml", "nc", "hyp", "names", "stride", "abc"), exclude=()) # copy attributes
|
| 827 |
+
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
|
| 828 |
+
self.pt = not self.dmb or model.pt # PyTorch model
|
| 829 |
+
self.model = model.eval()
|
| 830 |
+
if self.pt:
|
| 831 |
+
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
| 832 |
+
m.inplace = False # Detect.inplace=False for safe multithread inference
|
| 833 |
+
m.export = True # do not output loss values
|
| 834 |
+
|
| 835 |
+
def _apply(self, fn):
|
| 836 |
+
"""Applies to(), cpu(), cuda(), half() etc.
|
| 837 |
+
|
| 838 |
+
to model tensors excluding parameters or registered buffers.
|
| 839 |
+
"""
|
| 840 |
+
self = super()._apply(fn)
|
| 841 |
+
if self.pt:
|
| 842 |
+
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
| 843 |
+
m.stride = fn(m.stride)
|
| 844 |
+
m.grid = list(map(fn, m.grid))
|
| 845 |
+
if isinstance(m.anchor_grid, list):
|
| 846 |
+
m.anchor_grid = list(map(fn, m.anchor_grid))
|
| 847 |
+
return self
|
| 848 |
+
|
| 849 |
+
@smart_inference_mode()
|
| 850 |
+
def forward(self, ims, size=640, augment=False, profile=False):
|
| 851 |
+
"""Performs inference on inputs with optional augment & profiling.
|
| 852 |
+
|
| 853 |
+
Supports various formats including file, URI, OpenCV, PIL, numpy, torch.
|
| 854 |
+
"""
|
| 855 |
+
# For size(height=640, width=1280), RGB images example inputs are:
|
| 856 |
+
# file: ims = 'data/images/zidane.jpg' # str or PosixPath
|
| 857 |
+
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
| 858 |
+
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
| 859 |
+
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
|
| 860 |
+
# numpy: = np.zeros((640,1280,3)) # HWC
|
| 861 |
+
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
| 862 |
+
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
| 863 |
+
|
| 864 |
+
dt = (Profile(), Profile(), Profile())
|
| 865 |
+
with dt[0]:
|
| 866 |
+
if isinstance(size, int): # expand
|
| 867 |
+
size = (size, size)
|
| 868 |
+
p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
|
| 869 |
+
autocast = self.amp and (p.device.type != "cpu") # Automatic Mixed Precision (AMP) inference
|
| 870 |
+
if isinstance(ims, torch.Tensor): # torch
|
| 871 |
+
with amp.autocast(autocast):
|
| 872 |
+
return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
|
| 873 |
+
|
| 874 |
+
# Pre-process
|
| 875 |
+
n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
|
| 876 |
+
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
| 877 |
+
for i, im in enumerate(ims):
|
| 878 |
+
f = f"image{i}" # filename
|
| 879 |
+
if isinstance(im, (str, Path)): # filename or uri
|
| 880 |
+
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im), im
|
| 881 |
+
im = np.asarray(exif_transpose(im))
|
| 882 |
+
elif isinstance(im, Image.Image): # PIL Image
|
| 883 |
+
im, f = np.asarray(exif_transpose(im)), getattr(im, "filename", f) or f
|
| 884 |
+
files.append(Path(f).with_suffix(".jpg").name)
|
| 885 |
+
if im.shape[0] < 5: # image in CHW
|
| 886 |
+
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
| 887 |
+
im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
|
| 888 |
+
s = im.shape[:2] # HWC
|
| 889 |
+
shape0.append(s) # image shape
|
| 890 |
+
g = max(size) / max(s) # gain
|
| 891 |
+
shape1.append([int(y * g) for y in s])
|
| 892 |
+
ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
| 893 |
+
shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
|
| 894 |
+
x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
|
| 895 |
+
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
|
| 896 |
+
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
|
| 897 |
+
|
| 898 |
+
with amp.autocast(autocast):
|
| 899 |
+
# Inference
|
| 900 |
+
with dt[1]:
|
| 901 |
+
y = self.model(x, augment=augment) # forward
|
| 902 |
+
|
| 903 |
+
# Post-process
|
| 904 |
+
with dt[2]:
|
| 905 |
+
y = non_max_suppression(
|
| 906 |
+
y if self.dmb else y[0],
|
| 907 |
+
self.conf,
|
| 908 |
+
self.iou,
|
| 909 |
+
self.classes,
|
| 910 |
+
self.agnostic,
|
| 911 |
+
self.multi_label,
|
| 912 |
+
max_det=self.max_det,
|
| 913 |
+
) # NMS
|
| 914 |
+
for i in range(n):
|
| 915 |
+
scale_boxes(shape1, y[i][:, :4], shape0[i])
|
| 916 |
+
|
| 917 |
+
return Detections(ims, y, files, dt, self.names, x.shape)
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
class Detections:
|
| 921 |
+
"""Manages YOLOv5 detection results with methods for visualization, saving, cropping, and exporting detections."""
|
| 922 |
+
|
| 923 |
+
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
|
| 924 |
+
"""Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""
|
| 925 |
+
super().__init__()
|
| 926 |
+
d = pred[0].device # device
|
| 927 |
+
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
|
| 928 |
+
self.ims = ims # list of images as numpy arrays
|
| 929 |
+
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
| 930 |
+
self.names = names # class names
|
| 931 |
+
self.files = files # image filenames
|
| 932 |
+
self.times = times # profiling times
|
| 933 |
+
self.xyxy = pred # xyxy pixels
|
| 934 |
+
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
| 935 |
+
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
| 936 |
+
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
| 937 |
+
self.n = len(self.pred) # number of images (batch size)
|
| 938 |
+
self.t = tuple(x.t / self.n * 1e3 for x in times) # timestamps (ms)
|
| 939 |
+
self.s = tuple(shape) # inference BCHW shape
|
| 940 |
+
|
| 941 |
+
def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path("")):
|
| 942 |
+
"""Executes model predictions, displaying and/or saving outputs with optional crops and labels."""
|
| 943 |
+
s, crops = "", []
|
| 944 |
+
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
|
| 945 |
+
s += f"\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} " # string
|
| 946 |
+
if pred.shape[0]:
|
| 947 |
+
for c in pred[:, -1].unique():
|
| 948 |
+
n = (pred[:, -1] == c).sum() # detections per class
|
| 949 |
+
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
| 950 |
+
s = s.rstrip(", ")
|
| 951 |
+
if show or save or render or crop:
|
| 952 |
+
annotator = Annotator(im, example=str(self.names))
|
| 953 |
+
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
| 954 |
+
label = f"{self.names[int(cls)]} {conf:.2f}"
|
| 955 |
+
if crop:
|
| 956 |
+
file = save_dir / "crops" / self.names[int(cls)] / self.files[i] if save else None
|
| 957 |
+
crops.append(
|
| 958 |
+
{
|
| 959 |
+
"box": box,
|
| 960 |
+
"conf": conf,
|
| 961 |
+
"cls": cls,
|
| 962 |
+
"label": label,
|
| 963 |
+
"im": save_one_box(box, im, file=file, save=save),
|
| 964 |
+
}
|
| 965 |
+
)
|
| 966 |
+
else: # all others
|
| 967 |
+
annotator.box_label(box, label if labels else "", color=colors(cls))
|
| 968 |
+
im = annotator.im
|
| 969 |
+
else:
|
| 970 |
+
s += "(no detections)"
|
| 971 |
+
|
| 972 |
+
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
| 973 |
+
if show:
|
| 974 |
+
if is_jupyter():
|
| 975 |
+
from IPython.display import display
|
| 976 |
+
|
| 977 |
+
display(im)
|
| 978 |
+
else:
|
| 979 |
+
im.show(self.files[i])
|
| 980 |
+
if save:
|
| 981 |
+
f = self.files[i]
|
| 982 |
+
im.save(save_dir / f) # save
|
| 983 |
+
if i == self.n - 1:
|
| 984 |
+
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
|
| 985 |
+
if render:
|
| 986 |
+
self.ims[i] = np.asarray(im)
|
| 987 |
+
if pprint:
|
| 988 |
+
s = s.lstrip("\n")
|
| 989 |
+
return f"{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}" % self.t
|
| 990 |
+
if crop:
|
| 991 |
+
if save:
|
| 992 |
+
LOGGER.info(f"Saved results to {save_dir}\n")
|
| 993 |
+
return crops
|
| 994 |
+
|
| 995 |
+
@TryExcept("Showing images is not supported in this environment")
|
| 996 |
+
def show(self, labels=True):
|
| 997 |
+
"""Displays detection results with optional labels.
|
| 998 |
+
|
| 999 |
+
Usage: show(labels=True)
|
| 1000 |
+
"""
|
| 1001 |
+
self._run(show=True, labels=labels) # show results
|
| 1002 |
+
|
| 1003 |
+
def save(self, labels=True, save_dir="runs/detect/exp", exist_ok=False):
|
| 1004 |
+
"""Saves detection results with optional labels to a specified directory.
|
| 1005 |
+
|
| 1006 |
+
Usage: save(labels=True, save_dir='runs/detect/exp', exist_ok=False)
|
| 1007 |
+
"""
|
| 1008 |
+
save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
|
| 1009 |
+
self._run(save=True, labels=labels, save_dir=save_dir) # save results
|
| 1010 |
+
|
| 1011 |
+
def crop(self, save=True, save_dir="runs/detect/exp", exist_ok=False):
|
| 1012 |
+
"""Crops detection results, optionally saves them to a directory.
|
| 1013 |
+
|
| 1014 |
+
Args: save (bool), save_dir (str), exist_ok (bool).
|
| 1015 |
+
"""
|
| 1016 |
+
save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
|
| 1017 |
+
return self._run(crop=True, save=save, save_dir=save_dir) # crop results
|
| 1018 |
+
|
| 1019 |
+
def render(self, labels=True):
|
| 1020 |
+
"""Renders detection results with optional labels on images; args: labels (bool) indicating label inclusion."""
|
| 1021 |
+
self._run(render=True, labels=labels) # render results
|
| 1022 |
+
return self.ims
|
| 1023 |
+
|
| 1024 |
+
def pandas(self):
|
| 1025 |
+
"""Returns detections as pandas DataFrames for various box formats (xyxy, xyxyn, xywh, xywhn).
|
| 1026 |
+
|
| 1027 |
+
Example: print(results.pandas().xyxy[0]).
|
| 1028 |
+
"""
|
| 1029 |
+
new = copy(self) # return copy
|
| 1030 |
+
ca = "xmin", "ymin", "xmax", "ymax", "confidence", "class", "name" # xyxy columns
|
| 1031 |
+
cb = "xcenter", "ycenter", "width", "height", "confidence", "class", "name" # xywh columns
|
| 1032 |
+
for k, c in zip(["xyxy", "xyxyn", "xywh", "xywhn"], [ca, ca, cb, cb]):
|
| 1033 |
+
a = [[[*x[:5], int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
| 1034 |
+
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
| 1035 |
+
return new
|
| 1036 |
+
|
| 1037 |
+
def tolist(self):
|
| 1038 |
+
"""Converts a Detections object into a list of individual detection results for iteration.
|
| 1039 |
+
|
| 1040 |
+
Example: for result in results.tolist():
|
| 1041 |
+
"""
|
| 1042 |
+
r = range(self.n) # iterable
|
| 1043 |
+
return [
|
| 1044 |
+
Detections(
|
| 1045 |
+
[self.ims[i]],
|
| 1046 |
+
[self.pred[i]],
|
| 1047 |
+
[self.files[i]],
|
| 1048 |
+
self.times,
|
| 1049 |
+
self.names,
|
| 1050 |
+
self.s,
|
| 1051 |
+
)
|
| 1052 |
+
for i in r
|
| 1053 |
+
]
|
| 1054 |
+
|
| 1055 |
+
def print(self):
|
| 1056 |
+
"""Logs the string representation of the current object's state via the LOGGER."""
|
| 1057 |
+
LOGGER.info(self.__str__())
|
| 1058 |
+
|
| 1059 |
+
def __len__(self):
|
| 1060 |
+
"""Returns the number of results stored, overrides the default len(results)."""
|
| 1061 |
+
return self.n
|
| 1062 |
+
|
| 1063 |
+
def __str__(self):
|
| 1064 |
+
"""Returns a string representation of the model's results, suitable for printing, overrides default
|
| 1065 |
+
print(results).
|
| 1066 |
+
"""
|
| 1067 |
+
return self._run(pprint=True) # print results
|
| 1068 |
+
|
| 1069 |
+
def __repr__(self):
|
| 1070 |
+
"""Returns a string representation of the YOLOv5 object, including its class and formatted results."""
|
| 1071 |
+
return f"YOLOv5 {self.__class__} instance\n" + self.__str__()
|
| 1072 |
+
|
| 1073 |
+
|
| 1074 |
+
class Proto(nn.Module):
|
| 1075 |
+
"""YOLOv5 mask Proto module for segmentation models, performing convolutions and upsampling on input tensors."""
|
| 1076 |
+
|
| 1077 |
+
def __init__(self, c1, c_=256, c2=32):
|
| 1078 |
+
"""Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""
|
| 1079 |
+
super().__init__()
|
| 1080 |
+
self.cv1 = Conv(c1, c_, k=3)
|
| 1081 |
+
self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
|
| 1082 |
+
self.cv2 = Conv(c_, c_, k=3)
|
| 1083 |
+
self.cv3 = Conv(c_, c2)
|
| 1084 |
+
|
| 1085 |
+
def forward(self, x):
|
| 1086 |
+
"""Performs a forward pass using convolutional layers and upsampling on input tensor `x`."""
|
| 1087 |
+
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
|
| 1088 |
+
|
| 1089 |
+
|
| 1090 |
+
class Classify(nn.Module):
|
| 1091 |
+
"""YOLOv5 classification head with convolution, pooling, and dropout layers for channel transformation."""
|
| 1092 |
+
|
| 1093 |
+
def __init__(
|
| 1094 |
+
self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0
|
| 1095 |
+
): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
|
| 1096 |
+
"""Initializes YOLOv5 classification head with convolution, pooling, and dropout layers for input to output
|
| 1097 |
+
channel transformation.
|
| 1098 |
+
"""
|
| 1099 |
+
super().__init__()
|
| 1100 |
+
c_ = 1280 # efficientnet_b0 size
|
| 1101 |
+
self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
|
| 1102 |
+
self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
|
| 1103 |
+
self.drop = nn.Dropout(p=dropout_p, inplace=True)
|
| 1104 |
+
self.linear = nn.Linear(c_, c2) # to x(b,c2)
|
| 1105 |
+
|
| 1106 |
+
def forward(self, x):
|
| 1107 |
+
"""Processes input through conv, pool, drop, and linear layers; supports list concatenation input."""
|
| 1108 |
+
if isinstance(x, list):
|
| 1109 |
+
x = torch.cat(x, 1)
|
| 1110 |
+
return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
|
models/experimental.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
"""Experimental modules."""
|
| 3 |
+
|
| 4 |
+
import math
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
import torch.nn as nn
|
| 9 |
+
from ultralytics.utils.patches import torch_load
|
| 10 |
+
|
| 11 |
+
from utils.downloads import attempt_download
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Sum(nn.Module):
|
| 15 |
+
"""Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, n, weight=False):
|
| 18 |
+
"""Initializes a module to sum outputs of layers with number of inputs `n` and optional weighting, supporting 2+
|
| 19 |
+
inputs.
|
| 20 |
+
"""
|
| 21 |
+
super().__init__()
|
| 22 |
+
self.weight = weight # apply weights boolean
|
| 23 |
+
self.iter = range(n - 1) # iter object
|
| 24 |
+
if weight:
|
| 25 |
+
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
|
| 26 |
+
|
| 27 |
+
def forward(self, x):
|
| 28 |
+
"""Processes input through a customizable weighted sum of `n` inputs, optionally applying learned weights."""
|
| 29 |
+
y = x[0] # no weight
|
| 30 |
+
if self.weight:
|
| 31 |
+
w = torch.sigmoid(self.w) * 2
|
| 32 |
+
for i in self.iter:
|
| 33 |
+
y = y + x[i + 1] * w[i]
|
| 34 |
+
else:
|
| 35 |
+
for i in self.iter:
|
| 36 |
+
y = y + x[i + 1]
|
| 37 |
+
return y
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class MixConv2d(nn.Module):
|
| 41 |
+
"""Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595."""
|
| 42 |
+
|
| 43 |
+
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
| 44 |
+
"""Initializes MixConv2d with mixed depth-wise convolutional layers, taking input and output channels (c1, c2),
|
| 45 |
+
kernel sizes (k), stride (s), and channel distribution strategy (equal_ch).
|
| 46 |
+
"""
|
| 47 |
+
super().__init__()
|
| 48 |
+
n = len(k) # number of convolutions
|
| 49 |
+
if equal_ch: # equal c_ per group
|
| 50 |
+
i = torch.linspace(0, n - 1e-6, c2).floor() # c2 indices
|
| 51 |
+
c_ = [(i == g).sum() for g in range(n)] # intermediate channels
|
| 52 |
+
else: # equal weight.numel() per group
|
| 53 |
+
b = [c2] + [0] * n
|
| 54 |
+
a = np.eye(n + 1, n, k=-1)
|
| 55 |
+
a -= np.roll(a, 1, axis=1)
|
| 56 |
+
a *= np.array(k) ** 2
|
| 57 |
+
a[0] = 1
|
| 58 |
+
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
| 59 |
+
|
| 60 |
+
self.m = nn.ModuleList(
|
| 61 |
+
[nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]
|
| 62 |
+
)
|
| 63 |
+
self.bn = nn.BatchNorm2d(c2)
|
| 64 |
+
self.act = nn.SiLU()
|
| 65 |
+
|
| 66 |
+
def forward(self, x):
|
| 67 |
+
"""Performs forward pass by applying SiLU activation on batch-normalized concatenated convolutional layer
|
| 68 |
+
outputs.
|
| 69 |
+
"""
|
| 70 |
+
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class Ensemble(nn.ModuleList):
|
| 74 |
+
"""Ensemble of models."""
|
| 75 |
+
|
| 76 |
+
def __init__(self):
|
| 77 |
+
"""Initializes an ensemble of models to be used for aggregated predictions."""
|
| 78 |
+
super().__init__()
|
| 79 |
+
|
| 80 |
+
def forward(self, x, augment=False, profile=False, visualize=False):
|
| 81 |
+
"""Performs forward pass aggregating outputs from an ensemble of models.."""
|
| 82 |
+
y = [module(x, augment, profile, visualize)[0] for module in self]
|
| 83 |
+
# y = torch.stack(y).max(0)[0] # max ensemble
|
| 84 |
+
# y = torch.stack(y).mean(0) # mean ensemble
|
| 85 |
+
y = torch.cat(y, 1) # nms ensemble
|
| 86 |
+
return y, None # inference, train output
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def attempt_load(weights, device=None, inplace=True, fuse=True):
|
| 90 |
+
"""Loads and fuses an ensemble or single YOLOv5 model from weights, handling device placement and model adjustments.
|
| 91 |
+
|
| 92 |
+
Example inputs: weights=[a,b,c] or a single model weights=[a] or weights=a.
|
| 93 |
+
"""
|
| 94 |
+
from models.yolo import Detect, Model
|
| 95 |
+
|
| 96 |
+
model = Ensemble()
|
| 97 |
+
for w in weights if isinstance(weights, list) else [weights]:
|
| 98 |
+
ckpt = torch_load(attempt_download(w), map_location="cpu") # load
|
| 99 |
+
ckpt = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
|
| 100 |
+
|
| 101 |
+
# Model compatibility updates
|
| 102 |
+
if not hasattr(ckpt, "stride"):
|
| 103 |
+
ckpt.stride = torch.tensor([32.0])
|
| 104 |
+
if hasattr(ckpt, "names") and isinstance(ckpt.names, (list, tuple)):
|
| 105 |
+
ckpt.names = dict(enumerate(ckpt.names)) # convert to dict
|
| 106 |
+
|
| 107 |
+
model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, "fuse") else ckpt.eval()) # model in eval mode
|
| 108 |
+
|
| 109 |
+
# Module updates
|
| 110 |
+
for m in model.modules():
|
| 111 |
+
t = type(m)
|
| 112 |
+
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
|
| 113 |
+
m.inplace = inplace
|
| 114 |
+
if t is Detect and not isinstance(m.anchor_grid, list):
|
| 115 |
+
delattr(m, "anchor_grid")
|
| 116 |
+
setattr(m, "anchor_grid", [torch.zeros(1)] * m.nl)
|
| 117 |
+
elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
|
| 118 |
+
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
| 119 |
+
|
| 120 |
+
# Return model
|
| 121 |
+
if len(model) == 1:
|
| 122 |
+
return model[-1]
|
| 123 |
+
|
| 124 |
+
# Return detection ensemble
|
| 125 |
+
print(f"Ensemble created with {weights}\n")
|
| 126 |
+
for k in "names", "nc", "yaml":
|
| 127 |
+
setattr(model, k, getattr(model[0], k))
|
| 128 |
+
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
|
| 129 |
+
assert all(model[0].nc == m.nc for m in model), f"Models have different class counts: {[m.nc for m in model]}"
|
| 130 |
+
return model
|
models/hub/anchors.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Default anchors for COCO data
|
| 4 |
+
|
| 5 |
+
# P5 -------------------------------------------------------------------------------------------------------------------
|
| 6 |
+
# P5-640:
|
| 7 |
+
anchors_p5_640:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# P6 -------------------------------------------------------------------------------------------------------------------
|
| 13 |
+
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
|
| 14 |
+
anchors_p6_640:
|
| 15 |
+
- [9, 11, 21, 19, 17, 41] # P3/8
|
| 16 |
+
- [43, 32, 39, 70, 86, 64] # P4/16
|
| 17 |
+
- [65, 131, 134, 130, 120, 265] # P5/32
|
| 18 |
+
- [282, 180, 247, 354, 512, 387] # P6/64
|
| 19 |
+
|
| 20 |
+
# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
| 21 |
+
anchors_p6_1280:
|
| 22 |
+
- [19, 27, 44, 40, 38, 94] # P3/8
|
| 23 |
+
- [96, 68, 86, 152, 180, 137] # P4/16
|
| 24 |
+
- [140, 301, 303, 264, 238, 542] # P5/32
|
| 25 |
+
- [436, 615, 739, 380, 925, 792] # P6/64
|
| 26 |
+
|
| 27 |
+
# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
|
| 28 |
+
anchors_p6_1920:
|
| 29 |
+
- [28, 41, 67, 59, 57, 141] # P3/8
|
| 30 |
+
- [144, 103, 129, 227, 270, 205] # P4/16
|
| 31 |
+
- [209, 452, 455, 396, 358, 812] # P5/32
|
| 32 |
+
- [653, 922, 1109, 570, 1387, 1187] # P6/64
|
| 33 |
+
|
| 34 |
+
# P7 -------------------------------------------------------------------------------------------------------------------
|
| 35 |
+
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
|
| 36 |
+
anchors_p7_640:
|
| 37 |
+
- [11, 11, 13, 30, 29, 20] # P3/8
|
| 38 |
+
- [30, 46, 61, 38, 39, 92] # P4/16
|
| 39 |
+
- [78, 80, 146, 66, 79, 163] # P5/32
|
| 40 |
+
- [149, 150, 321, 143, 157, 303] # P6/64
|
| 41 |
+
- [257, 402, 359, 290, 524, 372] # P7/128
|
| 42 |
+
|
| 43 |
+
# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
|
| 44 |
+
anchors_p7_1280:
|
| 45 |
+
- [19, 22, 54, 36, 32, 77] # P3/8
|
| 46 |
+
- [70, 83, 138, 71, 75, 173] # P4/16
|
| 47 |
+
- [165, 159, 148, 334, 375, 151] # P5/32
|
| 48 |
+
- [334, 317, 251, 626, 499, 474] # P6/64
|
| 49 |
+
- [750, 326, 534, 814, 1079, 818] # P7/128
|
| 50 |
+
|
| 51 |
+
# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
|
| 52 |
+
anchors_p7_1920:
|
| 53 |
+
- [29, 34, 81, 55, 47, 115] # P3/8
|
| 54 |
+
- [105, 124, 207, 107, 113, 259] # P4/16
|
| 55 |
+
- [247, 238, 222, 500, 563, 227] # P5/32
|
| 56 |
+
- [501, 476, 376, 939, 749, 711] # P6/64
|
| 57 |
+
- [1126, 489, 801, 1222, 1618, 1227] # P7/128
|
models/hub/yolov3-spp.yaml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# darknet53 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [32, 3, 1]], # 0
|
| 17 |
+
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
| 18 |
+
[-1, 1, Bottleneck, [64]],
|
| 19 |
+
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
| 20 |
+
[-1, 2, Bottleneck, [128]],
|
| 21 |
+
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
| 22 |
+
[-1, 8, Bottleneck, [256]],
|
| 23 |
+
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
| 24 |
+
[-1, 8, Bottleneck, [512]],
|
| 25 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
| 26 |
+
[-1, 4, Bottleneck, [1024]], # 10
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# YOLOv3-SPP head
|
| 30 |
+
head: [
|
| 31 |
+
[-1, 1, Bottleneck, [1024, False]],
|
| 32 |
+
[-1, 1, SPP, [512, [5, 9, 13]]],
|
| 33 |
+
[-1, 1, Conv, [1024, 3, 1]],
|
| 34 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 35 |
+
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
| 36 |
+
|
| 37 |
+
[-2, 1, Conv, [256, 1, 1]],
|
| 38 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 39 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
| 40 |
+
[-1, 1, Bottleneck, [512, False]],
|
| 41 |
+
[-1, 1, Bottleneck, [512, False]],
|
| 42 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 43 |
+
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
| 44 |
+
|
| 45 |
+
[-2, 1, Conv, [128, 1, 1]],
|
| 46 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 47 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
| 48 |
+
[-1, 1, Bottleneck, [256, False]],
|
| 49 |
+
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
| 50 |
+
|
| 51 |
+
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 52 |
+
]
|
models/hub/yolov3-tiny.yaml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 14, 23, 27, 37, 58] # P4/16
|
| 9 |
+
- [81, 82, 135, 169, 344, 319] # P5/32
|
| 10 |
+
|
| 11 |
+
# YOLOv3-tiny backbone
|
| 12 |
+
backbone:
|
| 13 |
+
# [from, number, module, args]
|
| 14 |
+
[
|
| 15 |
+
[-1, 1, Conv, [16, 3, 1]], # 0
|
| 16 |
+
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
|
| 17 |
+
[-1, 1, Conv, [32, 3, 1]],
|
| 18 |
+
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
|
| 19 |
+
[-1, 1, Conv, [64, 3, 1]],
|
| 20 |
+
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
|
| 21 |
+
[-1, 1, Conv, [128, 3, 1]],
|
| 22 |
+
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
|
| 23 |
+
[-1, 1, Conv, [256, 3, 1]],
|
| 24 |
+
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
|
| 25 |
+
[-1, 1, Conv, [512, 3, 1]],
|
| 26 |
+
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
|
| 27 |
+
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# YOLOv3-tiny head
|
| 31 |
+
head: [
|
| 32 |
+
[-1, 1, Conv, [1024, 3, 1]],
|
| 33 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 34 |
+
[-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
|
| 35 |
+
|
| 36 |
+
[-2, 1, Conv, [128, 1, 1]],
|
| 37 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 38 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
| 39 |
+
[-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
|
| 40 |
+
|
| 41 |
+
[[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
|
| 42 |
+
]
|
models/hub/yolov3.yaml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# darknet53 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [32, 3, 1]], # 0
|
| 17 |
+
[-1, 1, Conv, [64, 3, 2]], # 1-P1/2
|
| 18 |
+
[-1, 1, Bottleneck, [64]],
|
| 19 |
+
[-1, 1, Conv, [128, 3, 2]], # 3-P2/4
|
| 20 |
+
[-1, 2, Bottleneck, [128]],
|
| 21 |
+
[-1, 1, Conv, [256, 3, 2]], # 5-P3/8
|
| 22 |
+
[-1, 8, Bottleneck, [256]],
|
| 23 |
+
[-1, 1, Conv, [512, 3, 2]], # 7-P4/16
|
| 24 |
+
[-1, 8, Bottleneck, [512]],
|
| 25 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
|
| 26 |
+
[-1, 4, Bottleneck, [1024]], # 10
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# YOLOv3 head
|
| 30 |
+
head: [
|
| 31 |
+
[-1, 1, Bottleneck, [1024, False]],
|
| 32 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 33 |
+
[-1, 1, Conv, [1024, 3, 1]],
|
| 34 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 35 |
+
[-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
|
| 36 |
+
|
| 37 |
+
[-2, 1, Conv, [256, 1, 1]],
|
| 38 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 39 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P4
|
| 40 |
+
[-1, 1, Bottleneck, [512, False]],
|
| 41 |
+
[-1, 1, Bottleneck, [512, False]],
|
| 42 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 43 |
+
[-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
|
| 44 |
+
|
| 45 |
+
[-2, 1, Conv, [128, 1, 1]],
|
| 46 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 47 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P3
|
| 48 |
+
[-1, 1, Bottleneck, [256, False]],
|
| 49 |
+
[-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
|
| 50 |
+
|
| 51 |
+
[[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 52 |
+
]
|
models/hub/yolov5-bifpn.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 BiFPN head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/hub/yolov5-fpn.yaml
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 FPN head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 3, C3, [1024, False]], # 10 (P5/32-large)
|
| 31 |
+
|
| 32 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 33 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 34 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 35 |
+
[-1, 3, C3, [512, False]], # 14 (P4/16-medium)
|
| 36 |
+
|
| 37 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 38 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 39 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 40 |
+
[-1, 3, C3, [256, False]], # 18 (P3/8-small)
|
| 41 |
+
|
| 42 |
+
[[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 43 |
+
]
|
models/hub/yolov5-p2.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
|
| 8 |
+
|
| 9 |
+
# YOLOv5 v6.0 backbone
|
| 10 |
+
backbone:
|
| 11 |
+
# [from, number, module, args]
|
| 12 |
+
[
|
| 13 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 14 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 15 |
+
[-1, 3, C3, [128]],
|
| 16 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 17 |
+
[-1, 6, C3, [256]],
|
| 18 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 19 |
+
[-1, 9, C3, [512]],
|
| 20 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 21 |
+
[-1, 3, C3, [1024]],
|
| 22 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
# YOLOv5 v6.0 head with (P2, P3, P4, P5) outputs
|
| 26 |
+
head: [
|
| 27 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 28 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 29 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 30 |
+
[-1, 3, C3, [512, False]], # 13
|
| 31 |
+
|
| 32 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 33 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 34 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 35 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 36 |
+
|
| 37 |
+
[-1, 1, Conv, [128, 1, 1]],
|
| 38 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 39 |
+
[[-1, 2], 1, Concat, [1]], # cat backbone P2
|
| 40 |
+
[-1, 1, C3, [128, False]], # 21 (P2/4-xsmall)
|
| 41 |
+
|
| 42 |
+
[-1, 1, Conv, [128, 3, 2]],
|
| 43 |
+
[[-1, 18], 1, Concat, [1]], # cat head P3
|
| 44 |
+
[-1, 3, C3, [256, False]], # 24 (P3/8-small)
|
| 45 |
+
|
| 46 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 47 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 48 |
+
[-1, 3, C3, [512, False]], # 27 (P4/16-medium)
|
| 49 |
+
|
| 50 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 51 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 52 |
+
[-1, 3, C3, [1024, False]], # 30 (P5/32-large)
|
| 53 |
+
|
| 54 |
+
[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5)
|
| 55 |
+
]
|
models/hub/yolov5-p34.yaml
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.50 # layer channel multiple
|
| 7 |
+
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
|
| 8 |
+
|
| 9 |
+
# YOLOv5 v6.0 backbone
|
| 10 |
+
backbone:
|
| 11 |
+
# [from, number, module, args]
|
| 12 |
+
[
|
| 13 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 14 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 15 |
+
[-1, 3, C3, [128]],
|
| 16 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 17 |
+
[-1, 6, C3, [256]],
|
| 18 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 19 |
+
[-1, 9, C3, [512]],
|
| 20 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 21 |
+
[-1, 3, C3, [1024]],
|
| 22 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
# YOLOv5 v6.0 head with (P3, P4) outputs
|
| 26 |
+
head: [
|
| 27 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 28 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 29 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 30 |
+
[-1, 3, C3, [512, False]], # 13
|
| 31 |
+
|
| 32 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 33 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 34 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 35 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 36 |
+
|
| 37 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 38 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 39 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 40 |
+
|
| 41 |
+
[[17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4)
|
| 42 |
+
]
|
models/hub/yolov5-p6.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
|
| 8 |
+
|
| 9 |
+
# YOLOv5 v6.0 backbone
|
| 10 |
+
backbone:
|
| 11 |
+
# [from, number, module, args]
|
| 12 |
+
[
|
| 13 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 14 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 15 |
+
[-1, 3, C3, [128]],
|
| 16 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 17 |
+
[-1, 6, C3, [256]],
|
| 18 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 19 |
+
[-1, 9, C3, [512]],
|
| 20 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 21 |
+
[-1, 3, C3, [768]],
|
| 22 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 23 |
+
[-1, 3, C3, [1024]],
|
| 24 |
+
[-1, 1, SPPF, [1024, 5]], # 11
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
# YOLOv5 v6.0 head with (P3, P4, P5, P6) outputs
|
| 28 |
+
head: [
|
| 29 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 30 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 31 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 32 |
+
[-1, 3, C3, [768, False]], # 15
|
| 33 |
+
|
| 34 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 35 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 36 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 37 |
+
[-1, 3, C3, [512, False]], # 19
|
| 38 |
+
|
| 39 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 40 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 41 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 42 |
+
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 45 |
+
[[-1, 20], 1, Concat, [1]], # cat head P4
|
| 46 |
+
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
| 47 |
+
|
| 48 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 49 |
+
[[-1, 16], 1, Concat, [1]], # cat head P5
|
| 50 |
+
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
| 51 |
+
|
| 52 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 53 |
+
[[-1, 12], 1, Concat, [1]], # cat head P6
|
| 54 |
+
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
| 55 |
+
|
| 56 |
+
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
| 57 |
+
]
|
models/hub/yolov5-p7.yaml
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors: 3 # AutoAnchor evolves 3 anchors per P output layer
|
| 8 |
+
|
| 9 |
+
# YOLOv5 v6.0 backbone
|
| 10 |
+
backbone:
|
| 11 |
+
# [from, number, module, args]
|
| 12 |
+
[
|
| 13 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 14 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 15 |
+
[-1, 3, C3, [128]],
|
| 16 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 17 |
+
[-1, 6, C3, [256]],
|
| 18 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 19 |
+
[-1, 9, C3, [512]],
|
| 20 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 21 |
+
[-1, 3, C3, [768]],
|
| 22 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 23 |
+
[-1, 3, C3, [1024]],
|
| 24 |
+
[-1, 1, Conv, [1280, 3, 2]], # 11-P7/128
|
| 25 |
+
[-1, 3, C3, [1280]],
|
| 26 |
+
[-1, 1, SPPF, [1280, 5]], # 13
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# YOLOv5 v6.0 head with (P3, P4, P5, P6, P7) outputs
|
| 30 |
+
head: [
|
| 31 |
+
[-1, 1, Conv, [1024, 1, 1]],
|
| 32 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 33 |
+
[[-1, 10], 1, Concat, [1]], # cat backbone P6
|
| 34 |
+
[-1, 3, C3, [1024, False]], # 17
|
| 35 |
+
|
| 36 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 37 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 38 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 39 |
+
[-1, 3, C3, [768, False]], # 21
|
| 40 |
+
|
| 41 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 42 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 43 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 44 |
+
[-1, 3, C3, [512, False]], # 25
|
| 45 |
+
|
| 46 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 47 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 48 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 49 |
+
[-1, 3, C3, [256, False]], # 29 (P3/8-small)
|
| 50 |
+
|
| 51 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 52 |
+
[[-1, 26], 1, Concat, [1]], # cat head P4
|
| 53 |
+
[-1, 3, C3, [512, False]], # 32 (P4/16-medium)
|
| 54 |
+
|
| 55 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 56 |
+
[[-1, 22], 1, Concat, [1]], # cat head P5
|
| 57 |
+
[-1, 3, C3, [768, False]], # 35 (P5/32-large)
|
| 58 |
+
|
| 59 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 60 |
+
[[-1, 18], 1, Concat, [1]], # cat head P6
|
| 61 |
+
[-1, 3, C3, [1024, False]], # 38 (P6/64-xlarge)
|
| 62 |
+
|
| 63 |
+
[-1, 1, Conv, [1024, 3, 2]],
|
| 64 |
+
[[-1, 14], 1, Concat, [1]], # cat head P7
|
| 65 |
+
[-1, 3, C3, [1280, False]], # 41 (P7/128-xxlarge)
|
| 66 |
+
|
| 67 |
+
[[29, 32, 35, 38, 41], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6, P7)
|
| 68 |
+
]
|
models/hub/yolov5-panet.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 PANet head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/hub/yolov5l6.yaml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [19, 27, 44, 40, 38, 94] # P3/8
|
| 9 |
+
- [96, 68, 86, 152, 180, 137] # P4/16
|
| 10 |
+
- [140, 301, 303, 264, 238, 542] # P5/32
|
| 11 |
+
- [436, 615, 739, 380, 925, 792] # P6/64
|
| 12 |
+
|
| 13 |
+
# YOLOv5 v6.0 backbone
|
| 14 |
+
backbone:
|
| 15 |
+
# [from, number, module, args]
|
| 16 |
+
[
|
| 17 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 18 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 19 |
+
[-1, 3, C3, [128]],
|
| 20 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 21 |
+
[-1, 6, C3, [256]],
|
| 22 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 23 |
+
[-1, 9, C3, [512]],
|
| 24 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 25 |
+
[-1, 3, C3, [768]],
|
| 26 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 27 |
+
[-1, 3, C3, [1024]],
|
| 28 |
+
[-1, 1, SPPF, [1024, 5]], # 11
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# YOLOv5 v6.0 head
|
| 32 |
+
head: [
|
| 33 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 34 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 35 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 36 |
+
[-1, 3, C3, [768, False]], # 15
|
| 37 |
+
|
| 38 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 39 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 40 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 41 |
+
[-1, 3, C3, [512, False]], # 19
|
| 42 |
+
|
| 43 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 44 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 45 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 46 |
+
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
| 47 |
+
|
| 48 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 49 |
+
[[-1, 20], 1, Concat, [1]], # cat head P4
|
| 50 |
+
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
| 51 |
+
|
| 52 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 53 |
+
[[-1, 16], 1, Concat, [1]], # cat head P5
|
| 54 |
+
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
| 55 |
+
|
| 56 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 57 |
+
[[-1, 12], 1, Concat, [1]], # cat head P6
|
| 58 |
+
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
| 59 |
+
|
| 60 |
+
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
| 61 |
+
]
|
models/hub/yolov5m6.yaml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.67 # model depth multiple
|
| 6 |
+
width_multiple: 0.75 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [19, 27, 44, 40, 38, 94] # P3/8
|
| 9 |
+
- [96, 68, 86, 152, 180, 137] # P4/16
|
| 10 |
+
- [140, 301, 303, 264, 238, 542] # P5/32
|
| 11 |
+
- [436, 615, 739, 380, 925, 792] # P6/64
|
| 12 |
+
|
| 13 |
+
# YOLOv5 v6.0 backbone
|
| 14 |
+
backbone:
|
| 15 |
+
# [from, number, module, args]
|
| 16 |
+
[
|
| 17 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 18 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 19 |
+
[-1, 3, C3, [128]],
|
| 20 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 21 |
+
[-1, 6, C3, [256]],
|
| 22 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 23 |
+
[-1, 9, C3, [512]],
|
| 24 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 25 |
+
[-1, 3, C3, [768]],
|
| 26 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 27 |
+
[-1, 3, C3, [1024]],
|
| 28 |
+
[-1, 1, SPPF, [1024, 5]], # 11
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# YOLOv5 v6.0 head
|
| 32 |
+
head: [
|
| 33 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 34 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 35 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 36 |
+
[-1, 3, C3, [768, False]], # 15
|
| 37 |
+
|
| 38 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 39 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 40 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 41 |
+
[-1, 3, C3, [512, False]], # 19
|
| 42 |
+
|
| 43 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 44 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 45 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 46 |
+
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
| 47 |
+
|
| 48 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 49 |
+
[[-1, 20], 1, Concat, [1]], # cat head P4
|
| 50 |
+
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
| 51 |
+
|
| 52 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 53 |
+
[[-1, 16], 1, Concat, [1]], # cat head P5
|
| 54 |
+
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
| 55 |
+
|
| 56 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 57 |
+
[[-1, 12], 1, Concat, [1]], # cat head P6
|
| 58 |
+
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
| 59 |
+
|
| 60 |
+
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
| 61 |
+
]
|
models/hub/yolov5n6.yaml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.25 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [19, 27, 44, 40, 38, 94] # P3/8
|
| 9 |
+
- [96, 68, 86, 152, 180, 137] # P4/16
|
| 10 |
+
- [140, 301, 303, 264, 238, 542] # P5/32
|
| 11 |
+
- [436, 615, 739, 380, 925, 792] # P6/64
|
| 12 |
+
|
| 13 |
+
# YOLOv5 v6.0 backbone
|
| 14 |
+
backbone:
|
| 15 |
+
# [from, number, module, args]
|
| 16 |
+
[
|
| 17 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 18 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 19 |
+
[-1, 3, C3, [128]],
|
| 20 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 21 |
+
[-1, 6, C3, [256]],
|
| 22 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 23 |
+
[-1, 9, C3, [512]],
|
| 24 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 25 |
+
[-1, 3, C3, [768]],
|
| 26 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 27 |
+
[-1, 3, C3, [1024]],
|
| 28 |
+
[-1, 1, SPPF, [1024, 5]], # 11
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# YOLOv5 v6.0 head
|
| 32 |
+
head: [
|
| 33 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 34 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 35 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 36 |
+
[-1, 3, C3, [768, False]], # 15
|
| 37 |
+
|
| 38 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 39 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 40 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 41 |
+
[-1, 3, C3, [512, False]], # 19
|
| 42 |
+
|
| 43 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 44 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 45 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 46 |
+
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
| 47 |
+
|
| 48 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 49 |
+
[[-1, 20], 1, Concat, [1]], # cat head P4
|
| 50 |
+
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
| 51 |
+
|
| 52 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 53 |
+
[[-1, 16], 1, Concat, [1]], # cat head P5
|
| 54 |
+
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
| 55 |
+
|
| 56 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 57 |
+
[[-1, 12], 1, Concat, [1]], # cat head P6
|
| 58 |
+
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
| 59 |
+
|
| 60 |
+
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
| 61 |
+
]
|
models/hub/yolov5s-LeakyReLU.yaml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
activation: nn.LeakyReLU(0.1) # <----- Conv() activation used throughout entire YOLOv5 model
|
| 6 |
+
depth_multiple: 0.33 # model depth multiple
|
| 7 |
+
width_multiple: 0.50 # layer channel multiple
|
| 8 |
+
anchors:
|
| 9 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 10 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 11 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 12 |
+
|
| 13 |
+
# YOLOv5 v6.0 backbone
|
| 14 |
+
backbone:
|
| 15 |
+
# [from, number, module, args]
|
| 16 |
+
[
|
| 17 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 18 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 19 |
+
[-1, 3, C3, [128]],
|
| 20 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 21 |
+
[-1, 6, C3, [256]],
|
| 22 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 23 |
+
[-1, 9, C3, [512]],
|
| 24 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 25 |
+
[-1, 3, C3, [1024]],
|
| 26 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
# YOLOv5 v6.0 head
|
| 30 |
+
head: [
|
| 31 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 32 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 33 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 34 |
+
[-1, 3, C3, [512, False]], # 13
|
| 35 |
+
|
| 36 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 37 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 38 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 39 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 40 |
+
|
| 41 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 42 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 43 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 44 |
+
|
| 45 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 46 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 47 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 48 |
+
|
| 49 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 50 |
+
]
|
models/hub/yolov5s-ghost.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.50 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3Ghost, [128]],
|
| 19 |
+
[-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3Ghost, [256]],
|
| 21 |
+
[-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3Ghost, [512]],
|
| 23 |
+
[-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3Ghost, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, GhostConv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3Ghost, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, GhostConv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3Ghost, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, GhostConv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3Ghost, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, GhostConv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3Ghost, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/hub/yolov5s-transformer.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.50 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/hub/yolov5s6.yaml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.50 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [19, 27, 44, 40, 38, 94] # P3/8
|
| 9 |
+
- [96, 68, 86, 152, 180, 137] # P4/16
|
| 10 |
+
- [140, 301, 303, 264, 238, 542] # P5/32
|
| 11 |
+
- [436, 615, 739, 380, 925, 792] # P6/64
|
| 12 |
+
|
| 13 |
+
# YOLOv5 v6.0 backbone
|
| 14 |
+
backbone:
|
| 15 |
+
# [from, number, module, args]
|
| 16 |
+
[
|
| 17 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 18 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 19 |
+
[-1, 3, C3, [128]],
|
| 20 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 21 |
+
[-1, 6, C3, [256]],
|
| 22 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 23 |
+
[-1, 9, C3, [512]],
|
| 24 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 25 |
+
[-1, 3, C3, [768]],
|
| 26 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 27 |
+
[-1, 3, C3, [1024]],
|
| 28 |
+
[-1, 1, SPPF, [1024, 5]], # 11
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# YOLOv5 v6.0 head
|
| 32 |
+
head: [
|
| 33 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 34 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 35 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 36 |
+
[-1, 3, C3, [768, False]], # 15
|
| 37 |
+
|
| 38 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 39 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 40 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 41 |
+
[-1, 3, C3, [512, False]], # 19
|
| 42 |
+
|
| 43 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 44 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 45 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 46 |
+
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
| 47 |
+
|
| 48 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 49 |
+
[[-1, 20], 1, Concat, [1]], # cat head P4
|
| 50 |
+
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
| 51 |
+
|
| 52 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 53 |
+
[[-1, 16], 1, Concat, [1]], # cat head P5
|
| 54 |
+
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
| 55 |
+
|
| 56 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 57 |
+
[[-1, 12], 1, Concat, [1]], # cat head P6
|
| 58 |
+
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
| 59 |
+
|
| 60 |
+
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
| 61 |
+
]
|
models/hub/yolov5x6.yaml
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.33 # model depth multiple
|
| 6 |
+
width_multiple: 1.25 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [19, 27, 44, 40, 38, 94] # P3/8
|
| 9 |
+
- [96, 68, 86, 152, 180, 137] # P4/16
|
| 10 |
+
- [140, 301, 303, 264, 238, 542] # P5/32
|
| 11 |
+
- [436, 615, 739, 380, 925, 792] # P6/64
|
| 12 |
+
|
| 13 |
+
# YOLOv5 v6.0 backbone
|
| 14 |
+
backbone:
|
| 15 |
+
# [from, number, module, args]
|
| 16 |
+
[
|
| 17 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 18 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 19 |
+
[-1, 3, C3, [128]],
|
| 20 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 21 |
+
[-1, 6, C3, [256]],
|
| 22 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 23 |
+
[-1, 9, C3, [512]],
|
| 24 |
+
[-1, 1, Conv, [768, 3, 2]], # 7-P5/32
|
| 25 |
+
[-1, 3, C3, [768]],
|
| 26 |
+
[-1, 1, Conv, [1024, 3, 2]], # 9-P6/64
|
| 27 |
+
[-1, 3, C3, [1024]],
|
| 28 |
+
[-1, 1, SPPF, [1024, 5]], # 11
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# YOLOv5 v6.0 head
|
| 32 |
+
head: [
|
| 33 |
+
[-1, 1, Conv, [768, 1, 1]],
|
| 34 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 35 |
+
[[-1, 8], 1, Concat, [1]], # cat backbone P5
|
| 36 |
+
[-1, 3, C3, [768, False]], # 15
|
| 37 |
+
|
| 38 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 39 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 40 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 41 |
+
[-1, 3, C3, [512, False]], # 19
|
| 42 |
+
|
| 43 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 44 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 45 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 46 |
+
[-1, 3, C3, [256, False]], # 23 (P3/8-small)
|
| 47 |
+
|
| 48 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 49 |
+
[[-1, 20], 1, Concat, [1]], # cat head P4
|
| 50 |
+
[-1, 3, C3, [512, False]], # 26 (P4/16-medium)
|
| 51 |
+
|
| 52 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 53 |
+
[[-1, 16], 1, Concat, [1]], # cat head P5
|
| 54 |
+
[-1, 3, C3, [768, False]], # 29 (P5/32-large)
|
| 55 |
+
|
| 56 |
+
[-1, 1, Conv, [768, 3, 2]],
|
| 57 |
+
[[-1, 12], 1, Concat, [1]], # cat head P6
|
| 58 |
+
[-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge)
|
| 59 |
+
|
| 60 |
+
[[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
|
| 61 |
+
]
|
models/segment/yolov5l-seg.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/segment/yolov5m-seg.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.67 # model depth multiple
|
| 6 |
+
width_multiple: 0.75 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/segment/yolov5n-seg.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.25 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/segment/yolov5s-seg.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.5 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/segment/yolov5x-seg.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.33 # model depth multiple
|
| 6 |
+
width_multiple: 1.25 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Segment, [nc, anchors, 32, 256]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/tf.py
ADDED
|
@@ -0,0 +1,775 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
"""
|
| 3 |
+
TensorFlow, Keras and TFLite versions of YOLOv5
|
| 4 |
+
Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
$ python models/tf.py --weights yolov5s.pt
|
| 8 |
+
|
| 9 |
+
Export:
|
| 10 |
+
$ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import argparse
|
| 14 |
+
import sys
|
| 15 |
+
from copy import deepcopy
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
FILE = Path(__file__).resolve()
|
| 19 |
+
ROOT = FILE.parents[1] # YOLOv5 root directory
|
| 20 |
+
if str(ROOT) not in sys.path:
|
| 21 |
+
sys.path.append(str(ROOT)) # add ROOT to PATH
|
| 22 |
+
# ROOT = ROOT.relative_to(Path.cwd()) # relative
|
| 23 |
+
|
| 24 |
+
import numpy as np
|
| 25 |
+
import tensorflow as tf
|
| 26 |
+
import torch
|
| 27 |
+
import torch.nn as nn
|
| 28 |
+
from tensorflow import keras
|
| 29 |
+
|
| 30 |
+
from models.common import (
|
| 31 |
+
C3,
|
| 32 |
+
SPP,
|
| 33 |
+
SPPF,
|
| 34 |
+
Bottleneck,
|
| 35 |
+
BottleneckCSP,
|
| 36 |
+
C3x,
|
| 37 |
+
Concat,
|
| 38 |
+
Conv,
|
| 39 |
+
CrossConv,
|
| 40 |
+
DWConv,
|
| 41 |
+
DWConvTranspose2d,
|
| 42 |
+
Focus,
|
| 43 |
+
autopad,
|
| 44 |
+
)
|
| 45 |
+
from models.experimental import MixConv2d, attempt_load
|
| 46 |
+
from models.yolo import Detect, Segment
|
| 47 |
+
from utils.activations import SiLU
|
| 48 |
+
from utils.general import LOGGER, make_divisible, print_args
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class TFBN(keras.layers.Layer):
|
| 52 |
+
"""TensorFlow BatchNormalization wrapper for initializing with optional pretrained weights."""
|
| 53 |
+
|
| 54 |
+
def __init__(self, w=None):
|
| 55 |
+
"""Initializes a TensorFlow BatchNormalization layer with optional pretrained weights."""
|
| 56 |
+
super().__init__()
|
| 57 |
+
self.bn = keras.layers.BatchNormalization(
|
| 58 |
+
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
|
| 59 |
+
gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
|
| 60 |
+
moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
|
| 61 |
+
moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
|
| 62 |
+
epsilon=w.eps,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def call(self, inputs):
|
| 66 |
+
"""Applies batch normalization to the inputs."""
|
| 67 |
+
return self.bn(inputs)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class TFPad(keras.layers.Layer):
|
| 71 |
+
"""Pads input tensors in spatial dimensions 1 and 2 with specified integer or tuple padding values."""
|
| 72 |
+
|
| 73 |
+
def __init__(self, pad):
|
| 74 |
+
"""Initializes a padding layer for spatial dimensions 1 and 2 with specified padding, supporting both int and
|
| 75 |
+
tuple inputs.
|
| 76 |
+
|
| 77 |
+
Inputs are
|
| 78 |
+
"""
|
| 79 |
+
super().__init__()
|
| 80 |
+
if isinstance(pad, int):
|
| 81 |
+
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
|
| 82 |
+
else: # tuple/list
|
| 83 |
+
self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
|
| 84 |
+
|
| 85 |
+
def call(self, inputs):
|
| 86 |
+
"""Pads input tensor with zeros using specified padding, suitable for int and tuple pad dimensions."""
|
| 87 |
+
return tf.pad(inputs, self.pad, mode="constant", constant_values=0)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class TFConv(keras.layers.Layer):
|
| 91 |
+
"""Implements a standard convolutional layer with optional batch normalization and activation for TensorFlow."""
|
| 92 |
+
|
| 93 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
|
| 94 |
+
"""Initializes a standard convolution layer with optional batch normalization and activation; supports only
|
| 95 |
+
group=1.
|
| 96 |
+
|
| 97 |
+
Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
|
| 98 |
+
"""
|
| 99 |
+
super().__init__()
|
| 100 |
+
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
|
| 101 |
+
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
|
| 102 |
+
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
|
| 103 |
+
conv = keras.layers.Conv2D(
|
| 104 |
+
filters=c2,
|
| 105 |
+
kernel_size=k,
|
| 106 |
+
strides=s,
|
| 107 |
+
padding="SAME" if s == 1 else "VALID",
|
| 108 |
+
use_bias=not hasattr(w, "bn"),
|
| 109 |
+
kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
|
| 110 |
+
bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
|
| 111 |
+
)
|
| 112 |
+
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
|
| 113 |
+
self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
|
| 114 |
+
self.act = activations(w.act) if act else tf.identity
|
| 115 |
+
|
| 116 |
+
def call(self, inputs):
|
| 117 |
+
"""Applies convolution, batch normalization, and activation function to input tensors."""
|
| 118 |
+
return self.act(self.bn(self.conv(inputs)))
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
class TFDWConv(keras.layers.Layer):
|
| 122 |
+
"""Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow."""
|
| 123 |
+
|
| 124 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
|
| 125 |
+
"""Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow
|
| 126 |
+
models.
|
| 127 |
+
|
| 128 |
+
Input are ch_in, ch_out, weights, kernel, stride, padding, groups.
|
| 129 |
+
"""
|
| 130 |
+
super().__init__()
|
| 131 |
+
assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
|
| 132 |
+
conv = keras.layers.DepthwiseConv2D(
|
| 133 |
+
kernel_size=k,
|
| 134 |
+
depth_multiplier=c2 // c1,
|
| 135 |
+
strides=s,
|
| 136 |
+
padding="SAME" if s == 1 else "VALID",
|
| 137 |
+
use_bias=not hasattr(w, "bn"),
|
| 138 |
+
depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
|
| 139 |
+
bias_initializer="zeros" if hasattr(w, "bn") else keras.initializers.Constant(w.conv.bias.numpy()),
|
| 140 |
+
)
|
| 141 |
+
self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
|
| 142 |
+
self.bn = TFBN(w.bn) if hasattr(w, "bn") else tf.identity
|
| 143 |
+
self.act = activations(w.act) if act else tf.identity
|
| 144 |
+
|
| 145 |
+
def call(self, inputs):
|
| 146 |
+
"""Applies convolution, batch normalization, and activation function to input tensors."""
|
| 147 |
+
return self.act(self.bn(self.conv(inputs)))
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
class TFDWConvTranspose2d(keras.layers.Layer):
|
| 151 |
+
"""Implements a depthwise ConvTranspose2D layer for TensorFlow with specific settings."""
|
| 152 |
+
|
| 153 |
+
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
|
| 154 |
+
"""Initializes depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.
|
| 155 |
+
|
| 156 |
+
Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
|
| 157 |
+
"""
|
| 158 |
+
super().__init__()
|
| 159 |
+
assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
|
| 160 |
+
assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
|
| 161 |
+
weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
|
| 162 |
+
self.c1 = c1
|
| 163 |
+
self.conv = [
|
| 164 |
+
keras.layers.Conv2DTranspose(
|
| 165 |
+
filters=1,
|
| 166 |
+
kernel_size=k,
|
| 167 |
+
strides=s,
|
| 168 |
+
padding="VALID",
|
| 169 |
+
output_padding=p2,
|
| 170 |
+
use_bias=True,
|
| 171 |
+
kernel_initializer=keras.initializers.Constant(weight[..., i : i + 1]),
|
| 172 |
+
bias_initializer=keras.initializers.Constant(bias[i]),
|
| 173 |
+
)
|
| 174 |
+
for i in range(c1)
|
| 175 |
+
]
|
| 176 |
+
|
| 177 |
+
def call(self, inputs):
|
| 178 |
+
"""Processes input through parallel convolutions and concatenates results, trimming border pixels."""
|
| 179 |
+
return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
class TFFocus(keras.layers.Layer):
|
| 183 |
+
"""Focuses spatial information into channel space using pixel shuffling and convolution for TensorFlow models."""
|
| 184 |
+
|
| 185 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
|
| 186 |
+
"""Initializes TFFocus layer to focus width and height information into channel space with custom convolution
|
| 187 |
+
parameters.
|
| 188 |
+
|
| 189 |
+
Inputs are ch_in, ch_out, kernel, stride, padding, groups.
|
| 190 |
+
"""
|
| 191 |
+
super().__init__()
|
| 192 |
+
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
|
| 193 |
+
|
| 194 |
+
def call(self, inputs):
|
| 195 |
+
"""Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4.
|
| 196 |
+
|
| 197 |
+
Example x(b,w,h,c) -> y(b,w/2,h/2,4c).
|
| 198 |
+
"""
|
| 199 |
+
inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
|
| 200 |
+
return self.conv(tf.concat(inputs, 3))
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
class TFBottleneck(keras.layers.Layer):
|
| 204 |
+
"""Implements a TensorFlow bottleneck layer with optional shortcut connections for efficient feature extraction."""
|
| 205 |
+
|
| 206 |
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
|
| 207 |
+
"""Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with
|
| 208 |
+
optional shortcut.
|
| 209 |
+
|
| 210 |
+
Arguments are ch_in, ch_out, shortcut, groups, expansion.
|
| 211 |
+
"""
|
| 212 |
+
super().__init__()
|
| 213 |
+
c_ = int(c2 * e) # hidden channels
|
| 214 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
| 215 |
+
self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
|
| 216 |
+
self.add = shortcut and c1 == c2
|
| 217 |
+
|
| 218 |
+
def call(self, inputs):
|
| 219 |
+
"""Performs forward pass; if shortcut is True & input/output channels match, adds input to the convolution
|
| 220 |
+
result.
|
| 221 |
+
"""
|
| 222 |
+
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
class TFCrossConv(keras.layers.Layer):
|
| 226 |
+
"""Implements a cross convolutional layer with optional expansion, grouping, and shortcut for TensorFlow."""
|
| 227 |
+
|
| 228 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
|
| 229 |
+
"""Initializes cross convolution layer with optional expansion, grouping, and shortcut addition capabilities."""
|
| 230 |
+
super().__init__()
|
| 231 |
+
c_ = int(c2 * e) # hidden channels
|
| 232 |
+
self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
|
| 233 |
+
self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
|
| 234 |
+
self.add = shortcut and c1 == c2
|
| 235 |
+
|
| 236 |
+
def call(self, inputs):
|
| 237 |
+
"""Passes input through two convolutions optionally adding the input if channel dimensions match."""
|
| 238 |
+
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
class TFConv2d(keras.layers.Layer):
|
| 242 |
+
"""Implements a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D for specified filters and stride."""
|
| 243 |
+
|
| 244 |
+
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
|
| 245 |
+
"""Initializes a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D functionality for given filter
|
| 246 |
+
sizes and stride.
|
| 247 |
+
"""
|
| 248 |
+
super().__init__()
|
| 249 |
+
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
|
| 250 |
+
self.conv = keras.layers.Conv2D(
|
| 251 |
+
filters=c2,
|
| 252 |
+
kernel_size=k,
|
| 253 |
+
strides=s,
|
| 254 |
+
padding="VALID",
|
| 255 |
+
use_bias=bias,
|
| 256 |
+
kernel_initializer=keras.initializers.Constant(w.weight.permute(2, 3, 1, 0).numpy()),
|
| 257 |
+
bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None,
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
def call(self, inputs):
|
| 261 |
+
"""Applies a convolution operation to the inputs and returns the result."""
|
| 262 |
+
return self.conv(inputs)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
class TFBottleneckCSP(keras.layers.Layer):
|
| 266 |
+
"""Implements a CSP bottleneck layer for TensorFlow models to enhance gradient flow and efficiency."""
|
| 267 |
+
|
| 268 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
| 269 |
+
"""Initializes CSP bottleneck layer with specified channel sizes, count, shortcut option, groups, and expansion
|
| 270 |
+
ratio.
|
| 271 |
+
|
| 272 |
+
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
|
| 273 |
+
"""
|
| 274 |
+
super().__init__()
|
| 275 |
+
c_ = int(c2 * e) # hidden channels
|
| 276 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
| 277 |
+
self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
|
| 278 |
+
self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
|
| 279 |
+
self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
|
| 280 |
+
self.bn = TFBN(w.bn)
|
| 281 |
+
self.act = lambda x: keras.activations.swish(x)
|
| 282 |
+
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
|
| 283 |
+
|
| 284 |
+
def call(self, inputs):
|
| 285 |
+
"""Processes input through the model layers, concatenates, normalizes, activates, and reduces the output
|
| 286 |
+
dimensions.
|
| 287 |
+
"""
|
| 288 |
+
y1 = self.cv3(self.m(self.cv1(inputs)))
|
| 289 |
+
y2 = self.cv2(inputs)
|
| 290 |
+
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
class TFC3(keras.layers.Layer):
|
| 294 |
+
"""CSP bottleneck layer with 3 convolutions for TensorFlow, supporting optional shortcuts and group convolutions."""
|
| 295 |
+
|
| 296 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
| 297 |
+
"""Initializes CSP Bottleneck with 3 convolutions, supporting optional shortcuts and group convolutions.
|
| 298 |
+
|
| 299 |
+
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
|
| 300 |
+
"""
|
| 301 |
+
super().__init__()
|
| 302 |
+
c_ = int(c2 * e) # hidden channels
|
| 303 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
| 304 |
+
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
|
| 305 |
+
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
|
| 306 |
+
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
|
| 307 |
+
|
| 308 |
+
def call(self, inputs):
|
| 309 |
+
"""Processes input through a sequence of transformations for object detection (YOLOv5).
|
| 310 |
+
|
| 311 |
+
See https://github.com/ultralytics/yolov5.
|
| 312 |
+
"""
|
| 313 |
+
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
class TFC3x(keras.layers.Layer):
|
| 317 |
+
"""A TensorFlow layer for enhanced feature extraction using cross-convolutions in object detection models."""
|
| 318 |
+
|
| 319 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
| 320 |
+
"""Initializes layer with cross-convolutions for enhanced feature extraction in object detection models.
|
| 321 |
+
|
| 322 |
+
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
|
| 323 |
+
"""
|
| 324 |
+
super().__init__()
|
| 325 |
+
c_ = int(c2 * e) # hidden channels
|
| 326 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
| 327 |
+
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
|
| 328 |
+
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
|
| 329 |
+
self.m = keras.Sequential(
|
| 330 |
+
[TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
def call(self, inputs):
|
| 334 |
+
"""Processes input through cascaded convolutions and merges features, returning the final tensor output."""
|
| 335 |
+
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
class TFSPP(keras.layers.Layer):
|
| 339 |
+
"""Implements spatial pyramid pooling for YOLOv3-SPP with specific channels and kernel sizes."""
|
| 340 |
+
|
| 341 |
+
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
|
| 342 |
+
"""Initializes a YOLOv3-SPP layer with specific input/output channels and kernel sizes for pooling."""
|
| 343 |
+
super().__init__()
|
| 344 |
+
c_ = c1 // 2 # hidden channels
|
| 345 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
| 346 |
+
self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
|
| 347 |
+
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]
|
| 348 |
+
|
| 349 |
+
def call(self, inputs):
|
| 350 |
+
"""Processes input through two TFConv layers and concatenates with max-pooled outputs at intermediate stage."""
|
| 351 |
+
x = self.cv1(inputs)
|
| 352 |
+
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
class TFSPPF(keras.layers.Layer):
|
| 356 |
+
"""Implements a fast spatial pyramid pooling layer for TensorFlow with optimized feature extraction."""
|
| 357 |
+
|
| 358 |
+
def __init__(self, c1, c2, k=5, w=None):
|
| 359 |
+
"""Initialize a fast spatial pyramid pooling layer with customizable channels, kernel size, and weights."""
|
| 360 |
+
super().__init__()
|
| 361 |
+
c_ = c1 // 2 # hidden channels
|
| 362 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
| 363 |
+
self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
|
| 364 |
+
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")
|
| 365 |
+
|
| 366 |
+
def call(self, inputs):
|
| 367 |
+
"""Executes the model's forward pass, concatenating input features with three max-pooled versions before final
|
| 368 |
+
convolution.
|
| 369 |
+
"""
|
| 370 |
+
x = self.cv1(inputs)
|
| 371 |
+
y1 = self.m(x)
|
| 372 |
+
y2 = self.m(y1)
|
| 373 |
+
return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
class TFDetect(keras.layers.Layer):
|
| 377 |
+
"""Implements YOLOv5 object detection layer in TensorFlow for predicting bounding boxes and class probabilities."""
|
| 378 |
+
|
| 379 |
+
def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
|
| 380 |
+
"""Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image
|
| 381 |
+
size.
|
| 382 |
+
"""
|
| 383 |
+
super().__init__()
|
| 384 |
+
self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
|
| 385 |
+
self.nc = nc # number of classes
|
| 386 |
+
self.no = nc + 5 # number of outputs per anchor
|
| 387 |
+
self.nl = len(anchors) # number of detection layers
|
| 388 |
+
self.na = len(anchors[0]) // 2 # number of anchors
|
| 389 |
+
self.grid = [tf.zeros(1)] * self.nl # init grid
|
| 390 |
+
self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
|
| 391 |
+
self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
|
| 392 |
+
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
|
| 393 |
+
self.training = False # set to False after building model
|
| 394 |
+
self.imgsz = imgsz
|
| 395 |
+
for i in range(self.nl):
|
| 396 |
+
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
|
| 397 |
+
self.grid[i] = self._make_grid(nx, ny)
|
| 398 |
+
|
| 399 |
+
def call(self, inputs):
|
| 400 |
+
"""Performs forward pass through the model layers to predict object bounding boxes and classifications."""
|
| 401 |
+
z = [] # inference output
|
| 402 |
+
x = []
|
| 403 |
+
for i in range(self.nl):
|
| 404 |
+
x.append(self.m[i](inputs[i]))
|
| 405 |
+
# x(bs,20,20,255) to x(bs,3,20,20,85)
|
| 406 |
+
ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
|
| 407 |
+
x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
|
| 408 |
+
|
| 409 |
+
if not self.training: # inference
|
| 410 |
+
y = x[i]
|
| 411 |
+
grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
|
| 412 |
+
anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
|
| 413 |
+
xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
|
| 414 |
+
wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
|
| 415 |
+
# Normalize xywh to 0-1 to reduce calibration error
|
| 416 |
+
xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
|
| 417 |
+
wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
|
| 418 |
+
y = tf.concat([xy, wh, tf.sigmoid(y[..., 4 : 5 + self.nc]), y[..., 5 + self.nc :]], -1)
|
| 419 |
+
z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
|
| 420 |
+
|
| 421 |
+
return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),)
|
| 422 |
+
|
| 423 |
+
@staticmethod
|
| 424 |
+
def _make_grid(nx=20, ny=20):
|
| 425 |
+
"""Generates a 2D grid of coordinates in (x, y) format with shape [1, 1, ny*nx, 2]."""
|
| 426 |
+
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
| 427 |
+
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
|
| 428 |
+
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
class TFSegment(TFDetect):
|
| 432 |
+
"""YOLOv5 segmentation head for TensorFlow, combining detection and segmentation."""
|
| 433 |
+
|
| 434 |
+
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
|
| 435 |
+
"""Initializes YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation
|
| 436 |
+
models.
|
| 437 |
+
"""
|
| 438 |
+
super().__init__(nc, anchors, ch, imgsz, w)
|
| 439 |
+
self.nm = nm # number of masks
|
| 440 |
+
self.npr = npr # number of protos
|
| 441 |
+
self.no = 5 + nc + self.nm # number of outputs per anchor
|
| 442 |
+
self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
|
| 443 |
+
self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
|
| 444 |
+
self.detect = TFDetect.call
|
| 445 |
+
|
| 446 |
+
def call(self, x):
|
| 447 |
+
"""Applies detection and proto layers on input, returning detections and optionally protos if training."""
|
| 448 |
+
p = self.proto(x[0])
|
| 449 |
+
# p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
|
| 450 |
+
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
|
| 451 |
+
x = self.detect(self, x)
|
| 452 |
+
return (x, p) if self.training else (x[0], p)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
class TFProto(keras.layers.Layer):
|
| 456 |
+
"""Implements convolutional and upsampling layers for feature extraction in YOLOv5 segmentation."""
|
| 457 |
+
|
| 458 |
+
def __init__(self, c1, c_=256, c2=32, w=None):
|
| 459 |
+
"""Initialize TFProto layer with convolutional and upsampling for feature extraction and transformation."""
|
| 460 |
+
super().__init__()
|
| 461 |
+
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
|
| 462 |
+
self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
|
| 463 |
+
self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
|
| 464 |
+
self.cv3 = TFConv(c_, c2, w=w.cv3)
|
| 465 |
+
|
| 466 |
+
def call(self, inputs):
|
| 467 |
+
"""Performs forward pass through the model, applying convolutions and upscaling on input tensor."""
|
| 468 |
+
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
class TFUpsample(keras.layers.Layer):
|
| 472 |
+
"""Implements a TensorFlow upsampling layer with specified size, scale factor, and interpolation mode."""
|
| 473 |
+
|
| 474 |
+
def __init__(self, size, scale_factor, mode, w=None):
|
| 475 |
+
"""Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor
|
| 476 |
+
is even.
|
| 477 |
+
|
| 478 |
+
Warning: all arguments needed including 'w'
|
| 479 |
+
"""
|
| 480 |
+
super().__init__()
|
| 481 |
+
assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
|
| 482 |
+
self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
|
| 483 |
+
# self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
|
| 484 |
+
# with default arguments: align_corners=False, half_pixel_centers=False
|
| 485 |
+
# self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
|
| 486 |
+
# size=(x.shape[1] * 2, x.shape[2] * 2))
|
| 487 |
+
|
| 488 |
+
def call(self, inputs):
|
| 489 |
+
"""Applies upsample operation to inputs using nearest neighbor interpolation."""
|
| 490 |
+
return self.upsample(inputs)
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
class TFConcat(keras.layers.Layer):
|
| 494 |
+
"""Implements TensorFlow's version of torch.concat() for concatenating tensors along the last dimension."""
|
| 495 |
+
|
| 496 |
+
def __init__(self, dimension=1, w=None):
|
| 497 |
+
"""Initializes a TensorFlow layer for NCHW to NHWC concatenation, requiring dimension=1."""
|
| 498 |
+
super().__init__()
|
| 499 |
+
assert dimension == 1, "convert only NCHW to NHWC concat"
|
| 500 |
+
self.d = 3
|
| 501 |
+
|
| 502 |
+
def call(self, inputs):
|
| 503 |
+
"""Concatenates a list of tensors along the last dimension, used for NCHW to NHWC conversion."""
|
| 504 |
+
return tf.concat(inputs, self.d)
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
def parse_model(d, ch, model, imgsz):
|
| 508 |
+
"""Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments."""
|
| 509 |
+
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
|
| 510 |
+
anchors, nc, gd, gw, ch_mul = (
|
| 511 |
+
d["anchors"],
|
| 512 |
+
d["nc"],
|
| 513 |
+
d["depth_multiple"],
|
| 514 |
+
d["width_multiple"],
|
| 515 |
+
d.get("channel_multiple"),
|
| 516 |
+
)
|
| 517 |
+
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
| 518 |
+
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
| 519 |
+
if not ch_mul:
|
| 520 |
+
ch_mul = 8
|
| 521 |
+
|
| 522 |
+
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
| 523 |
+
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
|
| 524 |
+
m_str = m
|
| 525 |
+
m = eval(m) if isinstance(m, str) else m # eval strings
|
| 526 |
+
for j, a in enumerate(args):
|
| 527 |
+
try:
|
| 528 |
+
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
| 529 |
+
except NameError:
|
| 530 |
+
pass
|
| 531 |
+
|
| 532 |
+
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
| 533 |
+
if m in [
|
| 534 |
+
nn.Conv2d,
|
| 535 |
+
Conv,
|
| 536 |
+
DWConv,
|
| 537 |
+
DWConvTranspose2d,
|
| 538 |
+
Bottleneck,
|
| 539 |
+
SPP,
|
| 540 |
+
SPPF,
|
| 541 |
+
MixConv2d,
|
| 542 |
+
Focus,
|
| 543 |
+
CrossConv,
|
| 544 |
+
BottleneckCSP,
|
| 545 |
+
C3,
|
| 546 |
+
C3x,
|
| 547 |
+
]:
|
| 548 |
+
c1, c2 = ch[f], args[0]
|
| 549 |
+
c2 = make_divisible(c2 * gw, ch_mul) if c2 != no else c2
|
| 550 |
+
|
| 551 |
+
args = [c1, c2, *args[1:]]
|
| 552 |
+
if m in [BottleneckCSP, C3, C3x]:
|
| 553 |
+
args.insert(2, n)
|
| 554 |
+
n = 1
|
| 555 |
+
elif m is nn.BatchNorm2d:
|
| 556 |
+
args = [ch[f]]
|
| 557 |
+
elif m is Concat:
|
| 558 |
+
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
|
| 559 |
+
elif m in [Detect, Segment]:
|
| 560 |
+
args.append([ch[x + 1] for x in f])
|
| 561 |
+
if isinstance(args[1], int): # number of anchors
|
| 562 |
+
args[1] = [list(range(args[1] * 2))] * len(f)
|
| 563 |
+
if m is Segment:
|
| 564 |
+
args[3] = make_divisible(args[3] * gw, ch_mul)
|
| 565 |
+
args.append(imgsz)
|
| 566 |
+
else:
|
| 567 |
+
c2 = ch[f]
|
| 568 |
+
|
| 569 |
+
tf_m = eval("TF" + m_str.replace("nn.", ""))
|
| 570 |
+
m_ = (
|
| 571 |
+
keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)])
|
| 572 |
+
if n > 1
|
| 573 |
+
else tf_m(*args, w=model.model[i])
|
| 574 |
+
) # module
|
| 575 |
+
|
| 576 |
+
torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
|
| 577 |
+
t = str(m)[8:-2].replace("__main__.", "") # module type
|
| 578 |
+
np = sum(x.numel() for x in torch_m_.parameters()) # number params
|
| 579 |
+
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
| 580 |
+
LOGGER.info(f"{i:>3}{f!s:>18}{n!s:>3}{np:>10} {t:<40}{args!s:<30}") # print
|
| 581 |
+
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
| 582 |
+
layers.append(m_)
|
| 583 |
+
ch.append(c2)
|
| 584 |
+
return keras.Sequential(layers), sorted(save)
|
| 585 |
+
|
| 586 |
+
|
| 587 |
+
class TFModel:
|
| 588 |
+
"""Implements YOLOv5 model in TensorFlow, supporting TensorFlow, Keras, and TFLite formats for object detection."""
|
| 589 |
+
|
| 590 |
+
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
|
| 591 |
+
"""Initialize TF YOLOv5 model with specified channels, classes, model instance, and input size."""
|
| 592 |
+
super().__init__()
|
| 593 |
+
if isinstance(cfg, dict):
|
| 594 |
+
self.yaml = cfg # model dict
|
| 595 |
+
else: # is *.yaml
|
| 596 |
+
import yaml # for torch hub
|
| 597 |
+
|
| 598 |
+
self.yaml_file = Path(cfg).name
|
| 599 |
+
with open(cfg) as f:
|
| 600 |
+
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
| 601 |
+
|
| 602 |
+
# Define model
|
| 603 |
+
if nc and nc != self.yaml["nc"]:
|
| 604 |
+
LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
|
| 605 |
+
self.yaml["nc"] = nc # override yaml value
|
| 606 |
+
self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
|
| 607 |
+
|
| 608 |
+
def predict(
|
| 609 |
+
self,
|
| 610 |
+
inputs,
|
| 611 |
+
tf_nms=False,
|
| 612 |
+
agnostic_nms=False,
|
| 613 |
+
topk_per_class=100,
|
| 614 |
+
topk_all=100,
|
| 615 |
+
iou_thres=0.45,
|
| 616 |
+
conf_thres=0.25,
|
| 617 |
+
):
|
| 618 |
+
"""Runs inference on input data, with an option for TensorFlow NMS."""
|
| 619 |
+
y = [] # outputs
|
| 620 |
+
x = inputs
|
| 621 |
+
for m in self.model.layers:
|
| 622 |
+
if m.f != -1: # if not from previous layer
|
| 623 |
+
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
| 624 |
+
|
| 625 |
+
x = m(x) # run
|
| 626 |
+
y.append(x if m.i in self.savelist else None) # save output
|
| 627 |
+
|
| 628 |
+
# Add TensorFlow NMS
|
| 629 |
+
if tf_nms:
|
| 630 |
+
boxes = self._xywh2xyxy(x[0][..., :4])
|
| 631 |
+
probs = x[0][:, :, 4:5]
|
| 632 |
+
classes = x[0][:, :, 5:]
|
| 633 |
+
scores = probs * classes
|
| 634 |
+
if agnostic_nms:
|
| 635 |
+
nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
|
| 636 |
+
else:
|
| 637 |
+
boxes = tf.expand_dims(boxes, 2)
|
| 638 |
+
nms = tf.image.combined_non_max_suppression(
|
| 639 |
+
boxes, scores, topk_per_class, topk_all, iou_thres, conf_thres, clip_boxes=False
|
| 640 |
+
)
|
| 641 |
+
return (nms,)
|
| 642 |
+
return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
|
| 643 |
+
# x = x[0] # [x(1,6300,85), ...] to x(6300,85)
|
| 644 |
+
# xywh = x[..., :4] # x(6300,4) boxes
|
| 645 |
+
# conf = x[..., 4:5] # x(6300,1) confidences
|
| 646 |
+
# cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
|
| 647 |
+
# return tf.concat([conf, cls, xywh], 1)
|
| 648 |
+
|
| 649 |
+
@staticmethod
|
| 650 |
+
def _xywh2xyxy(xywh):
|
| 651 |
+
"""Convert box format from [x, y, w, h] to [x1, y1, x2, y2], where xy1=top-left and xy2=bottom- right."""
|
| 652 |
+
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
|
| 653 |
+
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
class AgnosticNMS(keras.layers.Layer):
|
| 657 |
+
"""Performs agnostic non-maximum suppression (NMS) on detected objects using IoU and confidence thresholds."""
|
| 658 |
+
|
| 659 |
+
def call(self, input, topk_all, iou_thres, conf_thres):
|
| 660 |
+
"""Performs agnostic NMS on input tensors using given thresholds and top-K selection."""
|
| 661 |
+
return tf.map_fn(
|
| 662 |
+
lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
|
| 663 |
+
input,
|
| 664 |
+
fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
|
| 665 |
+
name="agnostic_nms",
|
| 666 |
+
)
|
| 667 |
+
|
| 668 |
+
@staticmethod
|
| 669 |
+
def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
|
| 670 |
+
"""Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence
|
| 671 |
+
thresholds.
|
| 672 |
+
"""
|
| 673 |
+
boxes, classes, scores = x
|
| 674 |
+
class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
|
| 675 |
+
scores_inp = tf.reduce_max(scores, -1)
|
| 676 |
+
selected_inds = tf.image.non_max_suppression(
|
| 677 |
+
boxes, scores_inp, max_output_size=topk_all, iou_threshold=iou_thres, score_threshold=conf_thres
|
| 678 |
+
)
|
| 679 |
+
selected_boxes = tf.gather(boxes, selected_inds)
|
| 680 |
+
padded_boxes = tf.pad(
|
| 681 |
+
selected_boxes,
|
| 682 |
+
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
|
| 683 |
+
mode="CONSTANT",
|
| 684 |
+
constant_values=0.0,
|
| 685 |
+
)
|
| 686 |
+
selected_scores = tf.gather(scores_inp, selected_inds)
|
| 687 |
+
padded_scores = tf.pad(
|
| 688 |
+
selected_scores,
|
| 689 |
+
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
|
| 690 |
+
mode="CONSTANT",
|
| 691 |
+
constant_values=-1.0,
|
| 692 |
+
)
|
| 693 |
+
selected_classes = tf.gather(class_inds, selected_inds)
|
| 694 |
+
padded_classes = tf.pad(
|
| 695 |
+
selected_classes,
|
| 696 |
+
paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
|
| 697 |
+
mode="CONSTANT",
|
| 698 |
+
constant_values=-1.0,
|
| 699 |
+
)
|
| 700 |
+
valid_detections = tf.shape(selected_inds)[0]
|
| 701 |
+
return padded_boxes, padded_scores, padded_classes, valid_detections
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
def activations(act=nn.SiLU):
|
| 705 |
+
"""Converts PyTorch activations to TensorFlow equivalents, supporting LeakyReLU, Hardswish, and SiLU/Swish."""
|
| 706 |
+
if isinstance(act, nn.LeakyReLU):
|
| 707 |
+
return lambda x: keras.activations.relu(x, alpha=0.1)
|
| 708 |
+
elif isinstance(act, nn.Hardswish):
|
| 709 |
+
return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
|
| 710 |
+
elif isinstance(act, (nn.SiLU, SiLU)):
|
| 711 |
+
return lambda x: keras.activations.swish(x)
|
| 712 |
+
else:
|
| 713 |
+
raise Exception(f"no matching TensorFlow activation found for PyTorch activation {act}")
|
| 714 |
+
|
| 715 |
+
|
| 716 |
+
def representative_dataset_gen(dataset, ncalib=100):
|
| 717 |
+
"""Generate representative dataset for calibration by yielding transformed numpy arrays from the input dataset."""
|
| 718 |
+
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
|
| 719 |
+
im = np.transpose(img, [1, 2, 0])
|
| 720 |
+
im = np.expand_dims(im, axis=0).astype(np.float32)
|
| 721 |
+
im /= 255
|
| 722 |
+
yield [im]
|
| 723 |
+
if n >= ncalib:
|
| 724 |
+
break
|
| 725 |
+
|
| 726 |
+
|
| 727 |
+
def run(
|
| 728 |
+
weights=ROOT / "yolov5s.pt", # weights path
|
| 729 |
+
imgsz=(640, 640), # inference size h,w
|
| 730 |
+
batch_size=1, # batch size
|
| 731 |
+
dynamic=False, # dynamic batch size
|
| 732 |
+
):
|
| 733 |
+
# PyTorch model
|
| 734 |
+
"""Exports YOLOv5 model from PyTorch to TensorFlow and Keras formats, performing inference for validation."""
|
| 735 |
+
im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
|
| 736 |
+
model = attempt_load(weights, device=torch.device("cpu"), inplace=True, fuse=False)
|
| 737 |
+
_ = model(im) # inference
|
| 738 |
+
model.info()
|
| 739 |
+
|
| 740 |
+
# TensorFlow model
|
| 741 |
+
im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
|
| 742 |
+
tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
|
| 743 |
+
_ = tf_model.predict(im) # inference
|
| 744 |
+
|
| 745 |
+
# Keras model
|
| 746 |
+
im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
|
| 747 |
+
keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
|
| 748 |
+
keras_model.summary()
|
| 749 |
+
|
| 750 |
+
LOGGER.info("PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.")
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
def parse_opt():
|
| 754 |
+
"""Parses and returns command-line options for model inference, including weights path, image size, batch size, and
|
| 755 |
+
dynamic batching.
|
| 756 |
+
"""
|
| 757 |
+
parser = argparse.ArgumentParser()
|
| 758 |
+
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
|
| 759 |
+
parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
|
| 760 |
+
parser.add_argument("--batch-size", type=int, default=1, help="batch size")
|
| 761 |
+
parser.add_argument("--dynamic", action="store_true", help="dynamic batch size")
|
| 762 |
+
opt = parser.parse_args()
|
| 763 |
+
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
|
| 764 |
+
print_args(vars(opt))
|
| 765 |
+
return opt
|
| 766 |
+
|
| 767 |
+
|
| 768 |
+
def main(opt):
|
| 769 |
+
"""Executes the YOLOv5 model run function with parsed command line options."""
|
| 770 |
+
run(**vars(opt))
|
| 771 |
+
|
| 772 |
+
|
| 773 |
+
if __name__ == "__main__":
|
| 774 |
+
opt = parse_opt()
|
| 775 |
+
main(opt)
|
models/yolo.py
ADDED
|
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
"""
|
| 3 |
+
YOLO-specific modules.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
$ python models/yolo.py --cfg yolov5s.yaml
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import argparse
|
| 10 |
+
import contextlib
|
| 11 |
+
import math
|
| 12 |
+
import os
|
| 13 |
+
import platform
|
| 14 |
+
import sys
|
| 15 |
+
from copy import deepcopy
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
import torch.nn as nn
|
| 20 |
+
|
| 21 |
+
FILE = Path(__file__).resolve()
|
| 22 |
+
ROOT = FILE.parents[1] # YOLOv5 root directory
|
| 23 |
+
if str(ROOT) not in sys.path:
|
| 24 |
+
sys.path.append(str(ROOT)) # add ROOT to PATH
|
| 25 |
+
if platform.system() != "Windows":
|
| 26 |
+
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
| 27 |
+
|
| 28 |
+
from models.common import (
|
| 29 |
+
C3,
|
| 30 |
+
C3SPP,
|
| 31 |
+
C3TR,
|
| 32 |
+
SPP,
|
| 33 |
+
SPPF,
|
| 34 |
+
Bottleneck,
|
| 35 |
+
BottleneckCSP,
|
| 36 |
+
C3Ghost,
|
| 37 |
+
C3x,
|
| 38 |
+
Classify,
|
| 39 |
+
Concat,
|
| 40 |
+
Contract,
|
| 41 |
+
Conv,
|
| 42 |
+
CrossConv,
|
| 43 |
+
DetectMultiBackend,
|
| 44 |
+
DWConv,
|
| 45 |
+
DWConvTranspose2d,
|
| 46 |
+
Expand,
|
| 47 |
+
Focus,
|
| 48 |
+
GhostBottleneck,
|
| 49 |
+
GhostConv,
|
| 50 |
+
Proto,
|
| 51 |
+
)
|
| 52 |
+
from models.experimental import MixConv2d
|
| 53 |
+
from utils.autoanchor import check_anchor_order
|
| 54 |
+
from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
|
| 55 |
+
from utils.plots import feature_visualization
|
| 56 |
+
from utils.torch_utils import (
|
| 57 |
+
fuse_conv_and_bn,
|
| 58 |
+
initialize_weights,
|
| 59 |
+
model_info,
|
| 60 |
+
profile,
|
| 61 |
+
scale_img,
|
| 62 |
+
select_device,
|
| 63 |
+
time_sync,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
import thop # for FLOPs computation
|
| 68 |
+
except ImportError:
|
| 69 |
+
thop = None
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class Detect(nn.Module):
|
| 73 |
+
"""YOLOv5 Detect head for processing input tensors and generating detection outputs in object detection models."""
|
| 74 |
+
|
| 75 |
+
stride = None # strides computed during build
|
| 76 |
+
dynamic = False # force grid reconstruction
|
| 77 |
+
export = False # export mode
|
| 78 |
+
|
| 79 |
+
def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
|
| 80 |
+
"""Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
|
| 81 |
+
super().__init__()
|
| 82 |
+
self.nc = nc # number of classes
|
| 83 |
+
self.no = nc + 5 # number of outputs per anchor
|
| 84 |
+
self.nl = len(anchors) # number of detection layers
|
| 85 |
+
self.na = len(anchors[0]) // 2 # number of anchors
|
| 86 |
+
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
|
| 87 |
+
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
|
| 88 |
+
self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
|
| 89 |
+
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
| 90 |
+
self.inplace = inplace # use inplace ops (e.g. slice assignment)
|
| 91 |
+
|
| 92 |
+
def forward(self, x):
|
| 93 |
+
"""Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
|
| 94 |
+
z = [] # inference output
|
| 95 |
+
for i in range(self.nl):
|
| 96 |
+
x[i] = self.m[i](x[i]) # conv
|
| 97 |
+
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
| 98 |
+
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
| 99 |
+
|
| 100 |
+
if not self.training: # inference
|
| 101 |
+
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
| 102 |
+
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
|
| 103 |
+
|
| 104 |
+
if isinstance(self, Segment): # (boxes + masks)
|
| 105 |
+
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
|
| 106 |
+
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
|
| 107 |
+
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
|
| 108 |
+
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
|
| 109 |
+
else: # Detect (boxes only)
|
| 110 |
+
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
|
| 111 |
+
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
|
| 112 |
+
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
|
| 113 |
+
y = torch.cat((xy, wh, conf), 4)
|
| 114 |
+
z.append(y.view(bs, self.na * nx * ny, self.no))
|
| 115 |
+
|
| 116 |
+
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
|
| 117 |
+
|
| 118 |
+
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
|
| 119 |
+
"""Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
|
| 120 |
+
d = self.anchors[i].device
|
| 121 |
+
t = self.anchors[i].dtype
|
| 122 |
+
shape = 1, self.na, ny, nx, 2 # grid shape
|
| 123 |
+
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
|
| 124 |
+
yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
|
| 125 |
+
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
|
| 126 |
+
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
|
| 127 |
+
return grid, anchor_grid
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
class Segment(Detect):
|
| 131 |
+
"""YOLOv5 Segment head for segmentation models, extending Detect with mask and prototype layers."""
|
| 132 |
+
|
| 133 |
+
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
|
| 134 |
+
"""Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
|
| 135 |
+
super().__init__(nc, anchors, ch, inplace)
|
| 136 |
+
self.nm = nm # number of masks
|
| 137 |
+
self.npr = npr # number of protos
|
| 138 |
+
self.no = 5 + nc + self.nm # number of outputs per anchor
|
| 139 |
+
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
| 140 |
+
self.proto = Proto(ch[0], self.npr, self.nm) # protos
|
| 141 |
+
self.detect = Detect.forward
|
| 142 |
+
|
| 143 |
+
def forward(self, x):
|
| 144 |
+
"""Processes input through the network, returning detections and prototypes; adjusts output based on
|
| 145 |
+
training/export mode.
|
| 146 |
+
"""
|
| 147 |
+
p = self.proto(x[0])
|
| 148 |
+
x = self.detect(self, x)
|
| 149 |
+
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
class BaseModel(nn.Module):
|
| 153 |
+
"""YOLOv5 base model."""
|
| 154 |
+
|
| 155 |
+
def forward(self, x, profile=False, visualize=False):
|
| 156 |
+
"""Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
|
| 157 |
+
visualization.
|
| 158 |
+
"""
|
| 159 |
+
return self._forward_once(x, profile, visualize) # single-scale inference, train
|
| 160 |
+
|
| 161 |
+
def _forward_once(self, x, profile=False, visualize=False):
|
| 162 |
+
"""Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
|
| 163 |
+
y, dt = [], [] # outputs
|
| 164 |
+
for m in self.model:
|
| 165 |
+
if m.f != -1: # if not from previous layer
|
| 166 |
+
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
| 167 |
+
if profile:
|
| 168 |
+
self._profile_one_layer(m, x, dt)
|
| 169 |
+
x = m(x) # run
|
| 170 |
+
y.append(x if m.i in self.save else None) # save output
|
| 171 |
+
if visualize:
|
| 172 |
+
feature_visualization(x, m.type, m.i, save_dir=visualize)
|
| 173 |
+
return x
|
| 174 |
+
|
| 175 |
+
def _profile_one_layer(self, m, x, dt):
|
| 176 |
+
"""Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
|
| 177 |
+
c = m == self.model[-1] # is final layer, copy input as inplace fix
|
| 178 |
+
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs
|
| 179 |
+
t = time_sync()
|
| 180 |
+
for _ in range(10):
|
| 181 |
+
m(x.copy() if c else x)
|
| 182 |
+
dt.append((time_sync() - t) * 100)
|
| 183 |
+
if m == self.model[0]:
|
| 184 |
+
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
|
| 185 |
+
LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}")
|
| 186 |
+
if c:
|
| 187 |
+
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
|
| 188 |
+
|
| 189 |
+
def fuse(self):
|
| 190 |
+
"""Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
|
| 191 |
+
LOGGER.info("Fusing layers... ")
|
| 192 |
+
for m in self.model.modules():
|
| 193 |
+
if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
|
| 194 |
+
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
| 195 |
+
delattr(m, "bn") # remove batchnorm
|
| 196 |
+
m.forward = m.forward_fuse # update forward
|
| 197 |
+
self.info()
|
| 198 |
+
return self
|
| 199 |
+
|
| 200 |
+
def info(self, verbose=False, img_size=640):
|
| 201 |
+
"""Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
|
| 202 |
+
model_info(self, verbose, img_size)
|
| 203 |
+
|
| 204 |
+
def _apply(self, fn):
|
| 205 |
+
"""Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
|
| 206 |
+
buffers.
|
| 207 |
+
"""
|
| 208 |
+
self = super()._apply(fn)
|
| 209 |
+
m = self.model[-1] # Detect()
|
| 210 |
+
if isinstance(m, (Detect, Segment)):
|
| 211 |
+
m.stride = fn(m.stride)
|
| 212 |
+
m.grid = list(map(fn, m.grid))
|
| 213 |
+
if isinstance(m.anchor_grid, list):
|
| 214 |
+
m.anchor_grid = list(map(fn, m.anchor_grid))
|
| 215 |
+
return self
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
class DetectionModel(BaseModel):
|
| 219 |
+
"""YOLOv5 detection model class for object detection tasks, supporting custom configurations and anchors."""
|
| 220 |
+
|
| 221 |
+
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
|
| 222 |
+
"""Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
|
| 223 |
+
super().__init__()
|
| 224 |
+
if isinstance(cfg, dict):
|
| 225 |
+
self.yaml = cfg # model dict
|
| 226 |
+
else: # is *.yaml
|
| 227 |
+
import yaml # for torch hub
|
| 228 |
+
|
| 229 |
+
self.yaml_file = Path(cfg).name
|
| 230 |
+
with open(cfg, encoding="ascii", errors="ignore") as f:
|
| 231 |
+
self.yaml = yaml.safe_load(f) # model dict
|
| 232 |
+
|
| 233 |
+
# Define model
|
| 234 |
+
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
|
| 235 |
+
if nc and nc != self.yaml["nc"]:
|
| 236 |
+
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
| 237 |
+
self.yaml["nc"] = nc # override yaml value
|
| 238 |
+
if anchors:
|
| 239 |
+
LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
|
| 240 |
+
self.yaml["anchors"] = round(anchors) # override yaml value
|
| 241 |
+
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
|
| 242 |
+
self.names = [str(i) for i in range(self.yaml["nc"])] # default names
|
| 243 |
+
self.inplace = self.yaml.get("inplace", True)
|
| 244 |
+
|
| 245 |
+
# Build strides, anchors
|
| 246 |
+
m = self.model[-1] # Detect()
|
| 247 |
+
if isinstance(m, (Detect, Segment)):
|
| 248 |
+
|
| 249 |
+
def _forward(x):
|
| 250 |
+
"""Passes the input 'x' through the model and returns the processed output."""
|
| 251 |
+
return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
|
| 252 |
+
|
| 253 |
+
s = 256 # 2x min stride
|
| 254 |
+
m.inplace = self.inplace
|
| 255 |
+
m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))]) # forward
|
| 256 |
+
check_anchor_order(m)
|
| 257 |
+
m.anchors /= m.stride.view(-1, 1, 1)
|
| 258 |
+
self.stride = m.stride
|
| 259 |
+
self._initialize_biases() # only run once
|
| 260 |
+
|
| 261 |
+
# Init weights, biases
|
| 262 |
+
initialize_weights(self)
|
| 263 |
+
self.info()
|
| 264 |
+
LOGGER.info("")
|
| 265 |
+
|
| 266 |
+
def forward(self, x, augment=False, profile=False, visualize=False):
|
| 267 |
+
"""Performs single-scale or augmented inference and may include profiling or visualization."""
|
| 268 |
+
if augment:
|
| 269 |
+
return self._forward_augment(x) # augmented inference, None
|
| 270 |
+
return self._forward_once(x, profile, visualize) # single-scale inference, train
|
| 271 |
+
|
| 272 |
+
def _forward_augment(self, x):
|
| 273 |
+
"""Performs augmented inference across different scales and flips, returning combined detections."""
|
| 274 |
+
img_size = x.shape[-2:] # height, width
|
| 275 |
+
s = [1, 0.83, 0.67] # scales
|
| 276 |
+
f = [None, 3, None] # flips (2-ud, 3-lr)
|
| 277 |
+
y = [] # outputs
|
| 278 |
+
for si, fi in zip(s, f):
|
| 279 |
+
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
|
| 280 |
+
yi = self._forward_once(xi)[0] # forward
|
| 281 |
+
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
| 282 |
+
yi = self._descale_pred(yi, fi, si, img_size)
|
| 283 |
+
y.append(yi)
|
| 284 |
+
y = self._clip_augmented(y) # clip augmented tails
|
| 285 |
+
return torch.cat(y, 1), None # augmented inference, train
|
| 286 |
+
|
| 287 |
+
def _descale_pred(self, p, flips, scale, img_size):
|
| 288 |
+
"""De-scales predictions from augmented inference, adjusting for flips and image size."""
|
| 289 |
+
if self.inplace:
|
| 290 |
+
p[..., :4] /= scale # de-scale
|
| 291 |
+
if flips == 2:
|
| 292 |
+
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
|
| 293 |
+
elif flips == 3:
|
| 294 |
+
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
|
| 295 |
+
else:
|
| 296 |
+
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
|
| 297 |
+
if flips == 2:
|
| 298 |
+
y = img_size[0] - y # de-flip ud
|
| 299 |
+
elif flips == 3:
|
| 300 |
+
x = img_size[1] - x # de-flip lr
|
| 301 |
+
p = torch.cat((x, y, wh, p[..., 4:]), -1)
|
| 302 |
+
return p
|
| 303 |
+
|
| 304 |
+
def _clip_augmented(self, y):
|
| 305 |
+
"""Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
|
| 306 |
+
layer counts.
|
| 307 |
+
"""
|
| 308 |
+
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
| 309 |
+
g = sum(4**x for x in range(nl)) # grid points
|
| 310 |
+
e = 1 # exclude layer count
|
| 311 |
+
i = (y[0].shape[1] // g) * sum(4**x for x in range(e)) # indices
|
| 312 |
+
y[0] = y[0][:, :-i] # large
|
| 313 |
+
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
|
| 314 |
+
y[-1] = y[-1][:, i:] # small
|
| 315 |
+
return y
|
| 316 |
+
|
| 317 |
+
def _initialize_biases(self, cf=None):
|
| 318 |
+
"""Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
|
| 319 |
+
|
| 320 |
+
For details see https://arxiv.org/abs/1708.02002 section 3.3.
|
| 321 |
+
"""
|
| 322 |
+
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
| 323 |
+
m = self.model[-1] # Detect() module
|
| 324 |
+
for mi, s in zip(m.m, m.stride): # from
|
| 325 |
+
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
| 326 |
+
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
| 327 |
+
b.data[:, 5 : 5 + m.nc] += (
|
| 328 |
+
math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
|
| 329 |
+
) # cls
|
| 330 |
+
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
class SegmentationModel(DetectionModel):
|
| 337 |
+
"""YOLOv5 segmentation model for object detection and segmentation tasks with configurable parameters."""
|
| 338 |
+
|
| 339 |
+
def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
|
| 340 |
+
"""Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for
|
| 341 |
+
channels, nc (int) for num classes, anchors (list).
|
| 342 |
+
"""
|
| 343 |
+
super().__init__(cfg, ch, nc, anchors)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
class ClassificationModel(BaseModel):
|
| 347 |
+
"""YOLOv5 classification model for image classification tasks, initialized with a config file or detection model."""
|
| 348 |
+
|
| 349 |
+
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
|
| 350 |
+
"""Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
|
| 351 |
+
index.
|
| 352 |
+
"""
|
| 353 |
+
super().__init__()
|
| 354 |
+
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
|
| 355 |
+
|
| 356 |
+
def _from_detection_model(self, model, nc=1000, cutoff=10):
|
| 357 |
+
"""Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
|
| 358 |
+
layer.
|
| 359 |
+
"""
|
| 360 |
+
if isinstance(model, DetectMultiBackend):
|
| 361 |
+
model = model.model # unwrap DetectMultiBackend
|
| 362 |
+
model.model = model.model[:cutoff] # backbone
|
| 363 |
+
m = model.model[-1] # last layer
|
| 364 |
+
ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels # ch into module
|
| 365 |
+
c = Classify(ch, nc) # Classify()
|
| 366 |
+
c.i, c.f, c.type = m.i, m.f, "models.common.Classify" # index, from, type
|
| 367 |
+
model.model[-1] = c # replace
|
| 368 |
+
self.model = model.model
|
| 369 |
+
self.stride = model.stride
|
| 370 |
+
self.save = []
|
| 371 |
+
self.nc = nc
|
| 372 |
+
|
| 373 |
+
def _from_yaml(self, cfg):
|
| 374 |
+
"""Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
|
| 375 |
+
self.model = None
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
def parse_model(d, ch):
|
| 379 |
+
"""Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
|
| 380 |
+
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
|
| 381 |
+
anchors, nc, gd, gw, act, ch_mul = (
|
| 382 |
+
d["anchors"],
|
| 383 |
+
d["nc"],
|
| 384 |
+
d["depth_multiple"],
|
| 385 |
+
d["width_multiple"],
|
| 386 |
+
d.get("activation"),
|
| 387 |
+
d.get("channel_multiple"),
|
| 388 |
+
)
|
| 389 |
+
if act:
|
| 390 |
+
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
|
| 391 |
+
LOGGER.info(f"{colorstr('activation:')} {act}") # print
|
| 392 |
+
if not ch_mul:
|
| 393 |
+
ch_mul = 8
|
| 394 |
+
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
| 395 |
+
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
| 396 |
+
|
| 397 |
+
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
| 398 |
+
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
|
| 399 |
+
m = eval(m) if isinstance(m, str) else m # eval strings
|
| 400 |
+
for j, a in enumerate(args):
|
| 401 |
+
with contextlib.suppress(NameError):
|
| 402 |
+
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
| 403 |
+
|
| 404 |
+
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
| 405 |
+
if m in {
|
| 406 |
+
Conv,
|
| 407 |
+
GhostConv,
|
| 408 |
+
Bottleneck,
|
| 409 |
+
GhostBottleneck,
|
| 410 |
+
SPP,
|
| 411 |
+
SPPF,
|
| 412 |
+
DWConv,
|
| 413 |
+
MixConv2d,
|
| 414 |
+
Focus,
|
| 415 |
+
CrossConv,
|
| 416 |
+
BottleneckCSP,
|
| 417 |
+
C3,
|
| 418 |
+
C3TR,
|
| 419 |
+
C3SPP,
|
| 420 |
+
C3Ghost,
|
| 421 |
+
nn.ConvTranspose2d,
|
| 422 |
+
DWConvTranspose2d,
|
| 423 |
+
C3x,
|
| 424 |
+
}:
|
| 425 |
+
c1, c2 = ch[f], args[0]
|
| 426 |
+
if c2 != no: # if not output
|
| 427 |
+
c2 = make_divisible(c2 * gw, ch_mul)
|
| 428 |
+
|
| 429 |
+
args = [c1, c2, *args[1:]]
|
| 430 |
+
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
|
| 431 |
+
args.insert(2, n) # number of repeats
|
| 432 |
+
n = 1
|
| 433 |
+
elif m is nn.BatchNorm2d:
|
| 434 |
+
args = [ch[f]]
|
| 435 |
+
elif m is Concat:
|
| 436 |
+
c2 = sum(ch[x] for x in f)
|
| 437 |
+
# TODO: channel, gw, gd
|
| 438 |
+
elif m in {Detect, Segment}:
|
| 439 |
+
args.append([ch[x] for x in f])
|
| 440 |
+
if isinstance(args[1], int): # number of anchors
|
| 441 |
+
args[1] = [list(range(args[1] * 2))] * len(f)
|
| 442 |
+
if m is Segment:
|
| 443 |
+
args[3] = make_divisible(args[3] * gw, ch_mul)
|
| 444 |
+
elif m is Contract:
|
| 445 |
+
c2 = ch[f] * args[0] ** 2
|
| 446 |
+
elif m is Expand:
|
| 447 |
+
c2 = ch[f] // args[0] ** 2
|
| 448 |
+
else:
|
| 449 |
+
c2 = ch[f]
|
| 450 |
+
|
| 451 |
+
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
|
| 452 |
+
t = str(m)[8:-2].replace("__main__.", "") # module type
|
| 453 |
+
np = sum(x.numel() for x in m_.parameters()) # number params
|
| 454 |
+
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
| 455 |
+
LOGGER.info(f"{i:>3}{f!s:>18}{n_:>3}{np:10.0f} {t:<40}{args!s:<30}") # print
|
| 456 |
+
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
| 457 |
+
layers.append(m_)
|
| 458 |
+
if i == 0:
|
| 459 |
+
ch = []
|
| 460 |
+
ch.append(c2)
|
| 461 |
+
return nn.Sequential(*layers), sorted(save)
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
if __name__ == "__main__":
|
| 465 |
+
parser = argparse.ArgumentParser()
|
| 466 |
+
parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
|
| 467 |
+
parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
|
| 468 |
+
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
|
| 469 |
+
parser.add_argument("--profile", action="store_true", help="profile model speed")
|
| 470 |
+
parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
|
| 471 |
+
parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
|
| 472 |
+
opt = parser.parse_args()
|
| 473 |
+
opt.cfg = check_yaml(opt.cfg) # check YAML
|
| 474 |
+
print_args(vars(opt))
|
| 475 |
+
device = select_device(opt.device)
|
| 476 |
+
|
| 477 |
+
# Create model
|
| 478 |
+
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
|
| 479 |
+
model = Model(opt.cfg).to(device)
|
| 480 |
+
|
| 481 |
+
# Options
|
| 482 |
+
if opt.line_profile: # profile layer by layer
|
| 483 |
+
model(im, profile=True)
|
| 484 |
+
|
| 485 |
+
elif opt.profile: # profile forward-backward
|
| 486 |
+
results = profile(input=im, ops=[model], n=3)
|
| 487 |
+
|
| 488 |
+
elif opt.test: # test all models
|
| 489 |
+
for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
|
| 490 |
+
try:
|
| 491 |
+
_ = Model(cfg)
|
| 492 |
+
except Exception as e:
|
| 493 |
+
print(f"Error in {cfg}: {e}")
|
| 494 |
+
|
| 495 |
+
else: # report fused model summary
|
| 496 |
+
model.fuse()
|
models/yolov5l.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.0 # model depth multiple
|
| 6 |
+
width_multiple: 1.0 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/yolov5m.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.67 # model depth multiple
|
| 6 |
+
width_multiple: 0.75 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/yolov5n.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.25 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/yolov5s.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 0.33 # model depth multiple
|
| 6 |
+
width_multiple: 0.50 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
models/yolov5x.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
|
| 3 |
+
# Parameters
|
| 4 |
+
nc: 80 # number of classes
|
| 5 |
+
depth_multiple: 1.33 # model depth multiple
|
| 6 |
+
width_multiple: 1.25 # layer channel multiple
|
| 7 |
+
anchors:
|
| 8 |
+
- [10, 13, 16, 30, 33, 23] # P3/8
|
| 9 |
+
- [30, 61, 62, 45, 59, 119] # P4/16
|
| 10 |
+
- [116, 90, 156, 198, 373, 326] # P5/32
|
| 11 |
+
|
| 12 |
+
# YOLOv5 v6.0 backbone
|
| 13 |
+
backbone:
|
| 14 |
+
# [from, number, module, args]
|
| 15 |
+
[
|
| 16 |
+
[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
|
| 17 |
+
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
| 18 |
+
[-1, 3, C3, [128]],
|
| 19 |
+
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
| 20 |
+
[-1, 6, C3, [256]],
|
| 21 |
+
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
| 22 |
+
[-1, 9, C3, [512]],
|
| 23 |
+
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
| 24 |
+
[-1, 3, C3, [1024]],
|
| 25 |
+
[-1, 1, SPPF, [1024, 5]], # 9
|
| 26 |
+
]
|
| 27 |
+
|
| 28 |
+
# YOLOv5 v6.0 head
|
| 29 |
+
head: [
|
| 30 |
+
[-1, 1, Conv, [512, 1, 1]],
|
| 31 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 32 |
+
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
| 33 |
+
[-1, 3, C3, [512, False]], # 13
|
| 34 |
+
|
| 35 |
+
[-1, 1, Conv, [256, 1, 1]],
|
| 36 |
+
[-1, 1, nn.Upsample, [None, 2, "nearest"]],
|
| 37 |
+
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
| 38 |
+
[-1, 3, C3, [256, False]], # 17 (P3/8-small)
|
| 39 |
+
|
| 40 |
+
[-1, 1, Conv, [256, 3, 2]],
|
| 41 |
+
[[-1, 14], 1, Concat, [1]], # cat head P4
|
| 42 |
+
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)
|
| 43 |
+
|
| 44 |
+
[-1, 1, Conv, [512, 3, 2]],
|
| 45 |
+
[[-1, 10], 1, Concat, [1]], # cat head P5
|
| 46 |
+
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)
|
| 47 |
+
|
| 48 |
+
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
| 49 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# YOLOv5 requirements
|
| 2 |
+
# Usage: pip install -r requirements.txt
|
| 3 |
+
|
| 4 |
+
# Base ------------------------------------------------------------------------
|
| 5 |
+
gitpython>=3.1.30
|
| 6 |
+
matplotlib>=3.3
|
| 7 |
+
numpy>=1.23.5
|
| 8 |
+
opencv-python>=4.1.1
|
| 9 |
+
pillow>=10.3.0
|
| 10 |
+
psutil # system resources
|
| 11 |
+
PyYAML>=5.3.1
|
| 12 |
+
requests>=2.32.2
|
| 13 |
+
scipy>=1.4.1
|
| 14 |
+
thop>=0.1.1 # FLOPs computation
|
| 15 |
+
torch>=1.8.0 # see https://pytorch.org/get-started/locally (recommended)
|
| 16 |
+
torchvision>=0.9.0
|
| 17 |
+
tqdm>=4.66.3
|
| 18 |
+
ultralytics>=8.2.64 # https://ultralytics.com
|
| 19 |
+
# protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012
|
| 20 |
+
|
| 21 |
+
# Logging ---------------------------------------------------------------------
|
| 22 |
+
# tensorboard>=2.4.1
|
| 23 |
+
# clearml>=1.2.0
|
| 24 |
+
# comet
|
| 25 |
+
|
| 26 |
+
# Plotting --------------------------------------------------------------------
|
| 27 |
+
pandas>=1.1.4
|
| 28 |
+
seaborn>=0.11.0
|
| 29 |
+
|
| 30 |
+
# Export ----------------------------------------------------------------------
|
| 31 |
+
# coremltools>=6.0 # CoreML export
|
| 32 |
+
# onnx>=1.10.0 # ONNX export
|
| 33 |
+
# onnx-simplifier>=0.4.1 # ONNX simplifier
|
| 34 |
+
# nvidia-pyindex # TensorRT export
|
| 35 |
+
# nvidia-tensorrt # TensorRT export
|
| 36 |
+
# scikit-learn<=1.1.2 # CoreML quantization
|
| 37 |
+
# tensorflow>=2.4.0,<=2.13.1 # TF exports (-cpu, -aarch64, -macos)
|
| 38 |
+
# tensorflowjs>=3.9.0 # TF.js export
|
| 39 |
+
# openvino-dev>=2023.0 # OpenVINO export
|
| 40 |
+
|
| 41 |
+
# Deploy ----------------------------------------------------------------------
|
| 42 |
+
packaging # Migration of deprecated pkg_resources packages
|
| 43 |
+
setuptools>=70.0.0 # Snyk vulnerability fix
|
| 44 |
+
# tritonclient[all]~=2.24.0
|
| 45 |
+
|
| 46 |
+
# Extras ----------------------------------------------------------------------
|
| 47 |
+
# ipython # interactive notebook
|
| 48 |
+
# mss # screenshots
|
| 49 |
+
# albumentations>=1.0.3
|
| 50 |
+
# pycocotools>=2.0.6 # COCO mAP
|
| 51 |
+
urllib3>=2.5.0 ; python_version > "3.8" # not directly required, pinned by Snyk to avoid a vulnerability
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
| 2 |
+
"""utils/initialization."""
|
| 3 |
+
|
| 4 |
+
import contextlib
|
| 5 |
+
import platform
|
| 6 |
+
import threading
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def emojis(str=""):
|
| 10 |
+
"""Returns an emoji-safe version of a string, stripped of emojis on Windows platforms."""
|
| 11 |
+
return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class TryExcept(contextlib.ContextDecorator):
|
| 15 |
+
"""A context manager and decorator for error handling that prints an optional message with emojis on exception."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, msg=""):
|
| 18 |
+
"""Initializes TryExcept with an optional message, used as a decorator or context manager for error handling."""
|
| 19 |
+
self.msg = msg
|
| 20 |
+
|
| 21 |
+
def __enter__(self):
|
| 22 |
+
"""Enter the runtime context related to this object for error handling with an optional message."""
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
def __exit__(self, exc_type, value, traceback):
|
| 26 |
+
"""Context manager exit method that prints an error message with emojis if an exception occurred, always returns
|
| 27 |
+
True.
|
| 28 |
+
"""
|
| 29 |
+
if value:
|
| 30 |
+
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
|
| 31 |
+
return True
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def threaded(func):
|
| 35 |
+
"""Decorator @threaded to run a function in a separate thread, returning the thread instance."""
|
| 36 |
+
|
| 37 |
+
def wrapper(*args, **kwargs):
|
| 38 |
+
"""Runs the decorated function in a separate daemon thread and returns the thread instance."""
|
| 39 |
+
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
|
| 40 |
+
thread.start()
|
| 41 |
+
return thread
|
| 42 |
+
|
| 43 |
+
return wrapper
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def join_threads(verbose=False):
|
| 47 |
+
"""Joins all daemon threads, optionally printing their names if verbose is True.
|
| 48 |
+
|
| 49 |
+
Example: atexit.register(lambda: join_threads())
|
| 50 |
+
"""
|
| 51 |
+
main_thread = threading.current_thread()
|
| 52 |
+
for t in threading.enumerate():
|
| 53 |
+
if t is not main_thread:
|
| 54 |
+
if verbose:
|
| 55 |
+
print(f"Joining thread {t.name}")
|
| 56 |
+
t.join()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def notebook_init(verbose=True):
|
| 60 |
+
"""Initializes notebook environment by checking requirements, cleaning up, and displaying system info."""
|
| 61 |
+
print("Checking setup...")
|
| 62 |
+
|
| 63 |
+
import os
|
| 64 |
+
import shutil
|
| 65 |
+
|
| 66 |
+
from ultralytics.utils.checks import check_requirements
|
| 67 |
+
|
| 68 |
+
from utils.general import check_font, is_colab
|
| 69 |
+
from utils.torch_utils import select_device # imports
|
| 70 |
+
|
| 71 |
+
check_font()
|
| 72 |
+
|
| 73 |
+
import psutil
|
| 74 |
+
|
| 75 |
+
if check_requirements("wandb", install=False):
|
| 76 |
+
os.system("pip uninstall -y wandb") # eliminate unexpected account creation prompt with infinite hang
|
| 77 |
+
if is_colab():
|
| 78 |
+
shutil.rmtree("/content/sample_data", ignore_errors=True) # remove colab /sample_data directory
|
| 79 |
+
|
| 80 |
+
# System info
|
| 81 |
+
display = None
|
| 82 |
+
if verbose:
|
| 83 |
+
gb = 1 << 30 # bytes to GiB (1024 ** 3)
|
| 84 |
+
ram = psutil.virtual_memory().total
|
| 85 |
+
total, _used, free = shutil.disk_usage("/")
|
| 86 |
+
with contextlib.suppress(Exception): # clear display if ipython is installed
|
| 87 |
+
from IPython import display
|
| 88 |
+
|
| 89 |
+
display.clear_output()
|
| 90 |
+
s = f"({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)"
|
| 91 |
+
else:
|
| 92 |
+
s = ""
|
| 93 |
+
|
| 94 |
+
select_device(newline=False)
|
| 95 |
+
print(emojis(f"Setup complete ✅ {s}"))
|
| 96 |
+
return display
|
utils/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (4.88 kB). View file
|
|
|
utils/__pycache__/augmentations.cpython-312.pyc
ADDED
|
Binary file (28.2 kB). View file
|
|
|
utils/__pycache__/autoanchor.cpython-312.pyc
ADDED
|
Binary file (12.3 kB). View file
|
|
|
utils/__pycache__/dataloaders.cpython-312.pyc
ADDED
|
Binary file (86.7 kB). View file
|
|
|
utils/__pycache__/downloads.cpython-312.pyc
ADDED
|
Binary file (7.83 kB). View file
|
|
|
utils/__pycache__/general.cpython-312.pyc
ADDED
|
Binary file (74.2 kB). View file
|
|
|
utils/__pycache__/metrics.cpython-312.pyc
ADDED
|
Binary file (21.9 kB). View file
|
|
|