Commit ·
f4c3b61
1
Parent(s): e7454de
update model scripts
Browse files- .gitignore +2 -0
- README.md +138 -3
- config.json +24 -0
- configuration_yolo.py +30 -0
- modeling_yolo.py +250 -0
- requirements.txt +4 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
.idea/
|
README.md
CHANGED
|
@@ -1,6 +1,141 @@
|
|
| 1 |
---
|
| 2 |
-
license:
|
| 3 |
pipeline_tag: object-detection
|
|
|
|
| 4 |
tags:
|
| 5 |
-
-
|
| 6 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
license: agpl-3.0
|
| 3 |
pipeline_tag: object-detection
|
| 4 |
+
library_name: ultralytics
|
| 5 |
tags:
|
| 6 |
+
- yolo
|
| 7 |
+
- yolov8
|
| 8 |
+
- ultralytics
|
| 9 |
+
- object-detection
|
| 10 |
+
- computer-vision
|
| 11 |
+
- face-detection
|
| 12 |
+
- person-detection
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
# YOLOv8x Face & Person Detector
|
| 16 |
+
|
| 17 |
+
<div align="center">
|
| 18 |
+
<a href="https://huggingface.co/spaces/iitolstykh/MiVOLO-Demo">
|
| 19 |
+
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg" alt="Open in Spaces">
|
| 20 |
+
<img src="images/image.png" width="500" alt="YOLO Detection Example"/>
|
| 21 |
+
</a>
|
| 22 |
+
</div>
|
| 23 |
+
|
| 24 |
+
## Model Description
|
| 25 |
+
|
| 26 |
+
This model is a fine-tuned version of **YOLOv8x** specialized in detecting two specific classes: **Face** and **Person**.
|
| 27 |
+
|
| 28 |
+
It has been trained on a large-scale proprietary dataset consisting of approximately 150,000 images.
|
| 29 |
+
The high capacity of the YOLOv8x architecture combined with a diverse proprietary dataset ensures high accuracy and robustness in various scenarios.
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
## How to Use
|
| 33 |
+
|
| 34 |
+
### Installation
|
| 35 |
+
```bash
|
| 36 |
+
pip install ultralytics==8.1.0 torch==2.5.1 transformers huggingface_hub
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### 1. Use with transformers
|
| 40 |
+
|
| 41 |
+
You can load the model using the Hugging Face transformers library by enabling custom code execution.
|
| 42 |
+
|
| 43 |
+
```python
|
| 44 |
+
from transformers import AutoModel
|
| 45 |
+
from PIL import Image
|
| 46 |
+
import torch
|
| 47 |
+
|
| 48 |
+
# 1. Load model with trust_remote_code=True
|
| 49 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 50 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 51 |
+
model = AutoModel.from_pretrained(
|
| 52 |
+
"iitolstykh/YOLO-Face-Person-Detector",
|
| 53 |
+
trust_remote_code=True,
|
| 54 |
+
dtype=torch_dtype,
|
| 55 |
+
).to(device)
|
| 56 |
+
|
| 57 |
+
# 2. Load image (You can use URL, PIL.Image or np.ndarray)
|
| 58 |
+
image = Image.open("path/to/your/image.jpg")
|
| 59 |
+
# image = cv2.imread("path/to/your/image.jpg")
|
| 60 |
+
|
| 61 |
+
# 3. Perform inference
|
| 62 |
+
results = model(image, conf=0.4, iou=0.7)[0]
|
| 63 |
+
|
| 64 |
+
# 4. Process results
|
| 65 |
+
print("Found objects:", [results.names[int(det.cls)] for det in results.boxes])
|
| 66 |
+
print("Boxes:", results.boxes)
|
| 67 |
+
# render_result(model=model.yolo, image=image, result=results).show()
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### 2. Use with ultralytics
|
| 71 |
+
|
| 72 |
+
If you prefer the standard Ultralytics API, you can download the weights from the Hub and load them directly.
|
| 73 |
+
|
| 74 |
+
```python
|
| 75 |
+
from ultralytics import YOLO
|
| 76 |
+
from huggingface_hub import hf_hub_download
|
| 77 |
+
import torch
|
| 78 |
+
|
| 79 |
+
# 1. Download model weights
|
| 80 |
+
model_path = hf_hub_download(
|
| 81 |
+
repo_id="iitolstykh/YOLO-Face-Person-Detector",
|
| 82 |
+
filename="yolov8x_person_face.pt",
|
| 83 |
+
repo_type="model"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# 2. Load model
|
| 87 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 88 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 89 |
+
model = YOLO(model_path)
|
| 90 |
+
model.fuse()
|
| 91 |
+
if torch_dtype is torch.float16:
|
| 92 |
+
model.model = model.model.half()
|
| 93 |
+
model.to(device)
|
| 94 |
+
|
| 95 |
+
# 3. Perform inference
|
| 96 |
+
image = 'https://variety.com/wp-content/uploads/2023/04/MCDNOHA_SP001.jpg'
|
| 97 |
+
results = model.predict(image, conf=0.4, iou=0.7, half=torch_dtype is torch.float16)
|
| 98 |
+
|
| 99 |
+
# 4. Show results
|
| 100 |
+
for result in results:
|
| 101 |
+
boxes = result.boxes
|
| 102 |
+
print("Found objects:", [result.names[int(c)] for c in boxes.cls])
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
### 3. Use with ultralyticsplus
|
| 106 |
+
|
| 107 |
+
This method automatically handles model downloading for ultralytics YOLO model.
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
pip install ultralyticsplus==0.1.0
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
```python
|
| 114 |
+
from ultralyticsplus import YOLO, render_result
|
| 115 |
+
|
| 116 |
+
# 1. Load model
|
| 117 |
+
model = YOLO('iitolstykh/YOLO-Face-Person-Detector')
|
| 118 |
+
|
| 119 |
+
# 2. Set model parameters
|
| 120 |
+
model.overrides['conf'] = 0.4
|
| 121 |
+
model.overrides['iou'] = 0.7
|
| 122 |
+
model.overrides['max_det'] = 100
|
| 123 |
+
|
| 124 |
+
# 3. Set image (You can use URL, PIL.Image or np.ndarray)
|
| 125 |
+
image = 'https://variety.com/wp-content/uploads/2023/04/MCDNOHA_SP001.jpg'
|
| 126 |
+
|
| 127 |
+
# 4. Perform inference
|
| 128 |
+
results = model.predict(image)
|
| 129 |
+
|
| 130 |
+
# 5. Show results
|
| 131 |
+
print("Found objects:", [results[0].names[int(det.cls)] for det in results[0].boxes])
|
| 132 |
+
render = render_result(model=model, image=image, result=results[0])
|
| 133 |
+
render.show()
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
## License
|
| 137 |
+
|
| 138 |
+
This model is based on the Ultralytics YOLOv8 architecture and inherits the **AGPL-3.0 License**.
|
| 139 |
+
|
| 140 |
+
Please refer to the official [Ultralytics Licensing](https://huggingface.co/Ultralytics/YOLOv8#license) details for more information regarding commercial usage and restrictions.
|
| 141 |
+
|
config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"YOLOV8ForObjectDetection"
|
| 4 |
+
],
|
| 5 |
+
"auto_map": {
|
| 6 |
+
"AutoConfig": "configuration_yolo.YoloV8Config",
|
| 7 |
+
"AutoModel": "modeling_yolo.YOLOV8ForObjectDetection"
|
| 8 |
+
},
|
| 9 |
+
|
| 10 |
+
"names": {
|
| 11 |
+
"0": "person",
|
| 12 |
+
"1": "face"
|
| 13 |
+
},
|
| 14 |
+
|
| 15 |
+
"model_config": "yolov8x.yaml",
|
| 16 |
+
"num_classes": 2,
|
| 17 |
+
"task": "detect",
|
| 18 |
+
"input_size": 640,
|
| 19 |
+
|
| 20 |
+
"model_type": "yolov8",
|
| 21 |
+
"torch_dtype": "float16",
|
| 22 |
+
"transformers_version": "4.57.1",
|
| 23 |
+
"verbose": 0
|
| 24 |
+
}
|
configuration_yolo.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A HuggingFace-style model configuration."""
|
| 2 |
+
from typing import Any, Dict, List
|
| 3 |
+
from transformers import PretrainedConfig
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class YoloV8Config(PretrainedConfig):
|
| 7 |
+
model_type = 'yolov8'
|
| 8 |
+
|
| 9 |
+
def __init__(
|
| 10 |
+
self,
|
| 11 |
+
model_config: str = "yolov8x.yaml",
|
| 12 |
+
task: str = 'detect',
|
| 13 |
+
num_classes: int = 2,
|
| 14 |
+
num_channels: int = 3,
|
| 15 |
+
input_size: int = 640,
|
| 16 |
+
names: Dict = {"0": "person", "1": "face"},
|
| 17 |
+
stride: List[int] = [8, 16, 32],
|
| 18 |
+
verbose: bool = False,
|
| 19 |
+
**kwargs: Any
|
| 20 |
+
):
|
| 21 |
+
self.input_size = input_size
|
| 22 |
+
self.num_channels = num_channels
|
| 23 |
+
self.task = task
|
| 24 |
+
self.model_config = model_config
|
| 25 |
+
self.num_classes = num_classes
|
| 26 |
+
self.stride = stride
|
| 27 |
+
self.verbose = bool(verbose)
|
| 28 |
+
self.names = {int(key): value for key, value in names.items()}
|
| 29 |
+
|
| 30 |
+
super().__init__(**kwargs)
|
modeling_yolo.py
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Union, Optional, Tuple, List
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import torch.nn as nn
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
from ultralytics import YOLO
|
| 10 |
+
from ultralytics.nn.tasks import (
|
| 11 |
+
DetectionModel,
|
| 12 |
+
BaseModel,
|
| 13 |
+
yaml_model_load,
|
| 14 |
+
LOGGER,
|
| 15 |
+
parse_model,
|
| 16 |
+
deepcopy,
|
| 17 |
+
Detect,
|
| 18 |
+
Segment,
|
| 19 |
+
Pose,
|
| 20 |
+
OBB,
|
| 21 |
+
initialize_weights,
|
| 22 |
+
)
|
| 23 |
+
from ultralytics.engine import predictor
|
| 24 |
+
from ultralytics.engine.results import Results
|
| 25 |
+
from configuration_yolo import YoloV8Config
|
| 26 |
+
from transformers import PreTrainedModel
|
| 27 |
+
|
| 28 |
+
os.unsetenv("CUBLAS_WORKSPACE_CONFIG")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class YOLOV8DetectionModel(BaseModel):
|
| 32 |
+
_predict_augment = DetectionModel._predict_augment
|
| 33 |
+
_descale_pred = DetectionModel._descale_pred
|
| 34 |
+
_clip_augmented = DetectionModel._clip_augmented
|
| 35 |
+
init_criterion = DetectionModel.init_criterion
|
| 36 |
+
|
| 37 |
+
# model, input channels, number of classes
|
| 38 |
+
def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True, stride: List[int]=[8, 16, 32]):
|
| 39 |
+
"""
|
| 40 |
+
Initializes the YOLOv8 detection model with the given configuration and parameters.
|
| 41 |
+
|
| 42 |
+
This constructor parses the model configuration (YAML), sets up the input channels and number of classes,
|
| 43 |
+
builds the model architecture, and initializes the strides and weights.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
cfg (str | dict): Path to the YAML configuration file or the configuration dictionary itself. Defaults to "yolov8n.yaml".
|
| 47 |
+
ch (int): Number of input channels. Defaults to 3.
|
| 48 |
+
nc (int, optional): Number of classes. If provided, overrides the value in the YAML config. Defaults to None.
|
| 49 |
+
verbose (bool): Whether to print model details during initialization. Defaults to True.
|
| 50 |
+
stride (List[int]): A list of stride values for the detection layer. Defaults to [8, 16, 32].
|
| 51 |
+
"""
|
| 52 |
+
super().__init__()
|
| 53 |
+
|
| 54 |
+
self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg) # cfg dict
|
| 55 |
+
|
| 56 |
+
# Define model
|
| 57 |
+
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
|
| 58 |
+
if nc and nc != self.yaml["nc"]:
|
| 59 |
+
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
| 60 |
+
self.yaml["nc"] = nc # override YAML value
|
| 61 |
+
self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose) # model, savelist
|
| 62 |
+
self.names = {i: f"{i}" for i in range(self.yaml["nc"])} # default names dict
|
| 63 |
+
self.inplace = self.yaml.get("inplace", True)
|
| 64 |
+
|
| 65 |
+
# Build strides
|
| 66 |
+
m = self.model[-1] # Detect()
|
| 67 |
+
if isinstance(m, (Detect, Segment, Pose, OBB)):
|
| 68 |
+
m.inplace = self.inplace
|
| 69 |
+
m.stride = torch.tensor(stride, dtype=torch.float32) # forward
|
| 70 |
+
self.stride = m.stride
|
| 71 |
+
m.bias_init() # only run once
|
| 72 |
+
else:
|
| 73 |
+
self.stride = torch.Tensor([32]) # default stride for i.e. RTDETR
|
| 74 |
+
|
| 75 |
+
# Init weights, biases
|
| 76 |
+
initialize_weights(self)
|
| 77 |
+
if verbose:
|
| 78 |
+
self.info()
|
| 79 |
+
LOGGER.info("")
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class YOLOWrapper(YOLO):
|
| 83 |
+
|
| 84 |
+
def __init__(self, model: torch.nn.Module, task=None) -> None:
|
| 85 |
+
"""
|
| 86 |
+
Initializes the YOLO wrapper around a specific PyTorch model.
|
| 87 |
+
|
| 88 |
+
This allows a standard PyTorch module to be used within the Ultralytics YOLO ecosystem
|
| 89 |
+
by overriding the default initialization to accept an existing model object.
|
| 90 |
+
|
| 91 |
+
Args:
|
| 92 |
+
model (torch.nn.Module): The PyTorch model instance to wrap.
|
| 93 |
+
task (str, optional): The specific task type for the YOLO model (e.g., 'detect'). Defaults to None.
|
| 94 |
+
"""
|
| 95 |
+
super().__init__(model="", task=task)
|
| 96 |
+
self.model = model
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class YOLOV8PreTrainedModel(PreTrainedModel):
|
| 100 |
+
config_class = YoloV8Config
|
| 101 |
+
base_model_prefix = 'model'
|
| 102 |
+
_no_split_modules = ['model']
|
| 103 |
+
|
| 104 |
+
def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None:
|
| 105 |
+
"""Initializes the weights of the model layers."""
|
| 106 |
+
|
| 107 |
+
if module is nn.Conv2d:
|
| 108 |
+
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
| 109 |
+
elif module is nn.BatchNorm2d:
|
| 110 |
+
module.eps = 1e-3
|
| 111 |
+
module.momentum = 0.03
|
| 112 |
+
elif module in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
|
| 113 |
+
module.inplace = True
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class YOLOV8ForObjectDetection(YOLOV8PreTrainedModel):
|
| 117 |
+
|
| 118 |
+
def __init__(self, config: YoloV8Config):
|
| 119 |
+
"""
|
| 120 |
+
Initializes the YOLOv8 object detection model based on the provided configuration.
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
config (YoloV8Config): The configuration object containing model parameters, channels, classes, and strides.
|
| 124 |
+
"""
|
| 125 |
+
super().__init__(config)
|
| 126 |
+
self.config = config
|
| 127 |
+
|
| 128 |
+
# initialize a model
|
| 129 |
+
self.model = YOLOV8DetectionModel(
|
| 130 |
+
cfg=self.config.model_config,
|
| 131 |
+
ch=self.config.num_channels,
|
| 132 |
+
nc=self.config.num_classes,
|
| 133 |
+
verbose=self.config.verbose,
|
| 134 |
+
stride=self.config.stride,
|
| 135 |
+
)
|
| 136 |
+
self.model.names = self.config.names
|
| 137 |
+
self.yolo: YOLOWrapper = None
|
| 138 |
+
self.half = False
|
| 139 |
+
|
| 140 |
+
# Initialize weights and apply final processing
|
| 141 |
+
self.post_init()
|
| 142 |
+
|
| 143 |
+
@classmethod
|
| 144 |
+
def from_pretrained(cls, pretrained_model_name_or_path: str, *model_args, **kwargs): # type: ignore
|
| 145 |
+
"""Loads a pretrained YOLOv8 model from a local path or the Hugging Face Hub and initializes the wrapper.
|
| 146 |
+
|
| 147 |
+
This class method loads the model weights, creates the `YOLOWrapper` instance, and configures
|
| 148 |
+
task-specific overrides to enable inference immediately after loading.
|
| 149 |
+
|
| 150 |
+
Args:
|
| 151 |
+
pretrained_model_name_or_path (str): The name or path of the pretrained model.
|
| 152 |
+
model_args: Additional positional arguments passed to parent class.
|
| 153 |
+
kwargs: Additional keyword arguments passed to parent class.
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
YOLOV8ForObjectDetection: The initialized model with loaded weights and active YOLO wrapper.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
dtype = torch.float32
|
| 160 |
+
if "dtype" in kwargs:
|
| 161 |
+
dtype = kwargs.pop("dtype")
|
| 162 |
+
elif "torch_dtype" in kwargs:
|
| 163 |
+
dtype = kwargs.pop("torch_dtype")
|
| 164 |
+
|
| 165 |
+
fuse = True
|
| 166 |
+
inplace = True
|
| 167 |
+
|
| 168 |
+
# set model weights
|
| 169 |
+
model = super().from_pretrained(
|
| 170 |
+
pretrained_model_name_or_path,
|
| 171 |
+
*model_args,
|
| 172 |
+
**kwargs,
|
| 173 |
+
dtype=torch.float32, # needed for model.fuse()
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# fuse model
|
| 177 |
+
for module in model.model.modules():
|
| 178 |
+
module.requires_grad_(False)
|
| 179 |
+
model.model = model.model.fuse().eval() if fuse and hasattr(model.model, "fuse") else model.model.eval()
|
| 180 |
+
|
| 181 |
+
# module updates
|
| 182 |
+
for m in model.model.modules():
|
| 183 |
+
t = type(m)
|
| 184 |
+
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment, Pose, OBB):
|
| 185 |
+
m.inplace = inplace
|
| 186 |
+
elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
|
| 187 |
+
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
| 188 |
+
|
| 189 |
+
model.model.fp16 = True if dtype is torch.float16 else False
|
| 190 |
+
model.half = True if dtype is torch.float16 else False
|
| 191 |
+
|
| 192 |
+
# initialize a wrapper
|
| 193 |
+
yolo = YOLOWrapper(model=model.model, task=model.config.task)
|
| 194 |
+
yolo.overrides["model"] = pretrained_model_name_or_path
|
| 195 |
+
yolo.overrides["task"] = model.config.task
|
| 196 |
+
yolo.overrides["half"] = True if dtype is torch.float16 else False
|
| 197 |
+
model.yolo = yolo
|
| 198 |
+
model.yolo.ckpt = pretrained_model_name_or_path
|
| 199 |
+
if dtype is torch.float16:
|
| 200 |
+
model.yolo.model = model.yolo.model.half()
|
| 201 |
+
|
| 202 |
+
return model
|
| 203 |
+
|
| 204 |
+
def forward(
|
| 205 |
+
self,
|
| 206 |
+
model_input: Optional[Union[Image.Image, np.ndarray, str]] = None,
|
| 207 |
+
return_dict: Optional[bool] = None,
|
| 208 |
+
conf: float = 0.4,
|
| 209 |
+
iou: float = 0.7,
|
| 210 |
+
max_det: int = 300,
|
| 211 |
+
verbose: bool = False,
|
| 212 |
+
**inference_kwargs,
|
| 213 |
+
) -> Union[Tuple, Results]:
|
| 214 |
+
"""
|
| 215 |
+
Performs a forward pass (inference) on the input data using the wrapped YOLO model.
|
| 216 |
+
|
| 217 |
+
This method handles image preprocessing, inference, and post-processing (NMS) based on the provided arguments.
|
| 218 |
+
It requires `from_pretrained` to have been called effectively to populate the internal YOLO wrapper.
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
model_input (Image.Image | np.ndarray | str, optional): The input image(s). Accepts file paths, PIL Images, or NumPy arrays.
|
| 222 |
+
return_dict (bool, optional): Whether to return a dictionary (or Results object) instead of a tuple. Defaults to model config.
|
| 223 |
+
conf (float): Confidence threshold for Non-Maximum Suppression (NMS). Defaults to 0.4.
|
| 224 |
+
iou (float): IoU threshold for NMS. Defaults to 0.7.
|
| 225 |
+
max_det (int): Maximum number of detections allowed per image. Defaults to 100.
|
| 226 |
+
verbose (bool): Whether to print verbose output during inference. Defaults to False.
|
| 227 |
+
**inference_kwargs: Additional arguments supported by the Ultralytics predictor (e.g., `imgsz`, `device`).
|
| 228 |
+
See all available arguments at https://docs.ultralytics.com/usage/cfg.
|
| 229 |
+
Returns:
|
| 230 |
+
Union[Tuple, Results]: A tuple containing the `Results` object if `return_dict` is False, otherwise the `Results` object directly.
|
| 231 |
+
|
| 232 |
+
Raises:
|
| 233 |
+
RuntimeError: If the internal YOLO wrapper is not initialized (e.g., model not loaded via `.from_pretrained()`).
|
| 234 |
+
"""
|
| 235 |
+
|
| 236 |
+
if self.yolo is None:
|
| 237 |
+
raise RuntimeError("Call .from_pretrained(...) before forward().")
|
| 238 |
+
|
| 239 |
+
# accepted image url, PIL.Image or np.ndarray image
|
| 240 |
+
assert isinstance(model_input, (Image.Image, np.ndarray, str))
|
| 241 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 242 |
+
|
| 243 |
+
detector_kwargs = {"conf": conf, "iou": iou, "verbose": verbose, "max_det": max_det}
|
| 244 |
+
detector_kwargs.update(inference_kwargs)
|
| 245 |
+
results: Results = self.yolo.predict(model_input, **detector_kwargs)
|
| 246 |
+
|
| 247 |
+
if not return_dict:
|
| 248 |
+
return (results,)
|
| 249 |
+
|
| 250 |
+
return results
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ultralytics==8.1.0
|
| 2 |
+
torch==2.5.1
|
| 3 |
+
torchvision==0.20.1
|
| 4 |
+
omegaconf
|