update model scripts

Browse files

Files changed (6) hide show

.gitignore +2 -0
README.md +138 -3
config.json +24 -0
configuration_yolo.py +30 -0
modeling_yolo.py +250 -0
requirements.txt +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .DS_Store
2	+ .idea/

README.md CHANGED Viewed

@@ -1,6 +1,141 @@
 ---
-license: apache-2.0
 pipeline_tag: object-detection
 tags:
-- YOLO
----

 ---
+license: agpl-3.0
 pipeline_tag: object-detection
+library_name: ultralytics
 tags:
+- yolo
+- yolov8
+- ultralytics
+- object-detection
+- computer-vision
+- face-detection
+- person-detection
+---
+# YOLOv8x Face & Person Detector
+<div align="center">
+  <a href="https://huggingface.co/spaces/iitolstykh/MiVOLO-Demo">
+    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/open-in-hf-spaces-sm.svg" alt="Open in Spaces">
+    <img src="images/image.png" width="500" alt="YOLO Detection Example"/>
+</a>
+</div>
+## Model Description
+This model is a fine-tuned version of **YOLOv8x** specialized in detecting two specific classes: **Face** and **Person**.
+It has been trained on a large-scale proprietary dataset consisting of approximately 150,000 images.
+The high capacity of the YOLOv8x architecture combined with a diverse proprietary dataset ensures high accuracy and robustness in various scenarios.
+## How to Use
+### Installation
+```bash
+pip install ultralytics==8.1.0 torch==2.5.1 transformers huggingface_hub
+```
+### 1. Use with transformers
+You can load the model using the Hugging Face transformers library by enabling custom code execution.
+```python
+from transformers import AutoModel
+from PIL import Image
+import torch
+# 1. Load model with trust_remote_code=True
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model = AutoModel.from_pretrained(
+    "iitolstykh/YOLO-Face-Person-Detector",
+    trust_remote_code=True,
+    dtype=torch_dtype,
+).to(device)
+# 2. Load image (You can use URL, PIL.Image or np.ndarray)
+image = Image.open("path/to/your/image.jpg")
+# image = cv2.imread("path/to/your/image.jpg")
+# 3. Perform inference
+results = model(image, conf=0.4, iou=0.7)[0]
+# 4. Process results
+print("Found objects:", [results.names[int(det.cls)] for det in results.boxes])
+print("Boxes:", results.boxes)
+# render_result(model=model.yolo, image=image, result=results).show()
+```
+### 2. Use with ultralytics
+If you prefer the standard Ultralytics API, you can download the weights from the Hub and load them directly.
+```python
+from ultralytics import YOLO
+from huggingface_hub import hf_hub_download
+import torch
+# 1. Download model weights
+model_path = hf_hub_download(
+    repo_id="iitolstykh/YOLO-Face-Person-Detector",
+    filename="yolov8x_person_face.pt",
+    repo_type="model"
+)
+# 2. Load model
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model = YOLO(model_path)
+model.fuse()
+if torch_dtype is torch.float16:
+    model.model = model.model.half()
+model.to(device)
+# 3. Perform inference
+image = 'https://variety.com/wp-content/uploads/2023/04/MCDNOHA_SP001.jpg'
+results = model.predict(image, conf=0.4, iou=0.7, half=torch_dtype is torch.float16)
+# 4. Show results
+for result in results:
+    boxes = result.boxes
+    print("Found objects:", [result.names[int(c)] for c in boxes.cls])
+```
+### 3. Use with ultralyticsplus
+This method automatically handles model downloading for ultralytics YOLO model.
+```bash
+pip install ultralyticsplus==0.1.0
+```
+```python
+from ultralyticsplus import YOLO, render_result
+# 1. Load model
+model = YOLO('iitolstykh/YOLO-Face-Person-Detector')
+# 2. Set model parameters
+model.overrides['conf'] = 0.4
+model.overrides['iou'] = 0.7
+model.overrides['max_det'] = 100
+# 3. Set image (You can use URL, PIL.Image or np.ndarray)
+image = 'https://variety.com/wp-content/uploads/2023/04/MCDNOHA_SP001.jpg'
+# 4. Perform inference
+results = model.predict(image)
+# 5. Show results
+print("Found objects:", [results[0].names[int(det.cls)] for det in results[0].boxes])
+render = render_result(model=model, image=image, result=results[0])
+render.show()
+```
+## License
+This model is based on the Ultralytics YOLOv8 architecture and inherits the **AGPL-3.0 License**.
+Please refer to the official [Ultralytics Licensing](https://huggingface.co/Ultralytics/YOLOv8#license) details for more information regarding commercial usage and restrictions.

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+    "architectures": [
+      "YOLOV8ForObjectDetection"
+    ],
+    "auto_map": {
+      "AutoConfig": "configuration_yolo.YoloV8Config",
+      "AutoModel": "modeling_yolo.YOLOV8ForObjectDetection"
+    },
+    "names": {
+      "0": "person",
+      "1": "face"
+    },
+    "model_config": "yolov8x.yaml",
+    "num_classes": 2,
+    "task": "detect",
+    "input_size": 640,
+    "model_type": "yolov8",
+    "torch_dtype": "float16",
+    "transformers_version": "4.57.1",
+    "verbose": 0
+  }

configuration_yolo.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""A HuggingFace-style model configuration."""
+from typing import Any, Dict, List
+from transformers import PretrainedConfig
+class YoloV8Config(PretrainedConfig):
+    model_type = 'yolov8'
+    def __init__(
+        self,
+        model_config: str = "yolov8x.yaml",
+        task: str = 'detect',
+        num_classes: int = 2,
+        num_channels: int = 3,
+        input_size: int = 640,
+        names: Dict = {"0": "person", "1": "face"},
+        stride: List[int] = [8, 16, 32],
+        verbose: bool = False,
+        **kwargs: Any
+    ):
+        self.input_size = input_size
+        self.num_channels = num_channels
+        self.task = task
+        self.model_config = model_config
+        self.num_classes = num_classes
+        self.stride = stride
+        self.verbose = bool(verbose)
+        self.names = {int(key): value for key, value in names.items()}
+        super().__init__(**kwargs)

modeling_yolo.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import os
+from typing import Union, Optional, Tuple, List
+import numpy as np
+from PIL import Image
+import torch.nn as nn
+import torch
+from ultralytics import YOLO
+from ultralytics.nn.tasks import (
+    DetectionModel,
+    BaseModel,
+    yaml_model_load,
+    LOGGER,
+    parse_model,
+    deepcopy,
+    Detect,
+    Segment,
+    Pose,
+    OBB,
+    initialize_weights,
+)
+from ultralytics.engine import  predictor
+from ultralytics.engine.results import Results
+from configuration_yolo import YoloV8Config
+from transformers import PreTrainedModel
+os.unsetenv("CUBLAS_WORKSPACE_CONFIG")
+class YOLOV8DetectionModel(BaseModel):
+    _predict_augment = DetectionModel._predict_augment
+    _descale_pred = DetectionModel._descale_pred
+    _clip_augmented = DetectionModel._clip_augmented
+    init_criterion = DetectionModel.init_criterion
+    # model, input channels, number of classes
+    def __init__(self, cfg="yolov8n.yaml", ch=3, nc=None, verbose=True, stride: List[int]=[8, 16, 32]):
+        """
+        Initializes the YOLOv8 detection model with the given configuration and parameters.
+        This constructor parses the model configuration (YAML), sets up the input channels and number of classes,
+        builds the model architecture, and initializes the strides and weights.
+        Args:
+            cfg (str | dict): Path to the YAML configuration file or the configuration dictionary itself. Defaults to "yolov8n.yaml".
+            ch (int): Number of input channels. Defaults to 3.
+            nc (int, optional): Number of classes. If provided, overrides the value in the YAML config. Defaults to None.
+            verbose (bool): Whether to print model details during initialization. Defaults to True.
+            stride (List[int]): A list of stride values for the detection layer. Defaults to [8, 16, 32].
+        """
+        super().__init__()
+        self.yaml = cfg if isinstance(cfg, dict) else yaml_model_load(cfg)  # cfg dict
+        # Define model
+        ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
+        if nc and nc != self.yaml["nc"]:
+            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override YAML value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=ch, verbose=verbose)  # model, savelist
+        self.names = {i: f"{i}" for i in range(self.yaml["nc"])}  # default names dict
+        self.inplace = self.yaml.get("inplace", True)
+        # Build strides
+        m = self.model[-1]  # Detect()
+        if isinstance(m, (Detect, Segment, Pose, OBB)):
+            m.inplace = self.inplace
+            m.stride = torch.tensor(stride, dtype=torch.float32)  # forward
+            self.stride = m.stride
+            m.bias_init()  # only run once
+        else:
+            self.stride = torch.Tensor([32])  # default stride for i.e. RTDETR
+        # Init weights, biases
+        initialize_weights(self)
+        if verbose:
+            self.info()
+            LOGGER.info("")
+class YOLOWrapper(YOLO):
+    def __init__(self, model: torch.nn.Module, task=None) -> None:
+        """
+        Initializes the YOLO wrapper around a specific PyTorch model.
+        This allows a standard PyTorch module to be used within the Ultralytics YOLO ecosystem
+        by overriding the default initialization to accept an existing model object.
+        Args:
+            model (torch.nn.Module): The PyTorch model instance to wrap.
+            task (str, optional): The specific task type for the YOLO model (e.g., 'detect'). Defaults to None.
+        """
+        super().__init__(model="", task=task)
+        self.model = model
+class YOLOV8PreTrainedModel(PreTrainedModel):
+    config_class = YoloV8Config
+    base_model_prefix = 'model'
+    _no_split_modules = ['model']
+    def _init_weights(self, module: Union[nn.Linear, nn.Conv2d, nn.LayerNorm]) -> None:
+        """Initializes the weights of the model layers."""
+        if module is nn.Conv2d:
+            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        elif module is nn.BatchNorm2d:
+            module.eps = 1e-3
+            module.momentum = 0.03
+        elif module in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            module.inplace = True
+class YOLOV8ForObjectDetection(YOLOV8PreTrainedModel):
+    def __init__(self, config: YoloV8Config):
+        """
+        Initializes the YOLOv8 object detection model based on the provided configuration.
+        Args:
+            config (YoloV8Config): The configuration object containing model parameters, channels, classes, and strides.
+        """
+        super().__init__(config)
+        self.config = config
+        # initialize a model
+        self.model = YOLOV8DetectionModel(
+            cfg=self.config.model_config,
+            ch=self.config.num_channels,
+            nc=self.config.num_classes,
+            verbose=self.config.verbose,
+            stride=self.config.stride,
+        )
+        self.model.names = self.config.names
+        self.yolo: YOLOWrapper = None
+        self.half = False
+        # Initialize weights and apply final processing
+        self.post_init()
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: str, *model_args, **kwargs):  # type: ignore
+        """Loads a pretrained YOLOv8 model from a local path or the Hugging Face Hub and initializes the wrapper.
+        This class method loads the model weights, creates the `YOLOWrapper` instance, and configures
+        task-specific overrides to enable inference immediately after loading.
+        Args:
+            pretrained_model_name_or_path (str): The name or path of the pretrained model.
+            model_args: Additional positional arguments passed to parent class.
+            kwargs: Additional keyword arguments passed to parent class.
+        Returns:
+            YOLOV8ForObjectDetection: The initialized model with loaded weights and active YOLO wrapper.
+        """
+        dtype = torch.float32
+        if "dtype" in kwargs:
+            dtype = kwargs.pop("dtype")
+        elif "torch_dtype" in kwargs:
+            dtype = kwargs.pop("torch_dtype")
+        fuse = True
+        inplace = True
+        # set model weights
+        model = super().from_pretrained(
+            pretrained_model_name_or_path,
+            *model_args,
+            **kwargs,
+            dtype=torch.float32,  # needed for model.fuse()
+        )
+        # fuse model
+        for module in model.model.modules():
+            module.requires_grad_(False)
+        model.model = model.model.fuse().eval() if fuse and hasattr(model.model, "fuse") else model.model.eval()
+        # module updates
+        for m in model.model.modules():
+            t = type(m)
+            if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Segment, Pose, OBB):
+                m.inplace = inplace
+            elif t is nn.Upsample and not hasattr(m, "recompute_scale_factor"):
+                m.recompute_scale_factor = None  # torch 1.11.0 compatibility
+        model.model.fp16 = True if dtype is torch.float16 else False
+        model.half = True if dtype is torch.float16 else False
+        # initialize a wrapper
+        yolo = YOLOWrapper(model=model.model, task=model.config.task)
+        yolo.overrides["model"] = pretrained_model_name_or_path
+        yolo.overrides["task"] = model.config.task
+        yolo.overrides["half"] = True if dtype is torch.float16 else False
+        model.yolo = yolo
+        model.yolo.ckpt = pretrained_model_name_or_path
+        if dtype is torch.float16:
+            model.yolo.model = model.yolo.model.half()
+        return model
+    def forward(
+        self,
+        model_input: Optional[Union[Image.Image, np.ndarray, str]] = None,
+        return_dict: Optional[bool] = None,
+        conf: float = 0.4,
+        iou: float = 0.7,
+        max_det: int = 300,
+        verbose: bool = False,
+        **inference_kwargs,
+    ) -> Union[Tuple, Results]:
+        """
+        Performs a forward pass (inference) on the input data using the wrapped YOLO model.
+        This method handles image preprocessing, inference, and post-processing (NMS) based on the provided arguments.
+        It requires `from_pretrained` to have been called effectively to populate the internal YOLO wrapper.
+        Args:
+            model_input (Image.Image | np.ndarray | str, optional): The input image(s). Accepts file paths, PIL Images, or NumPy arrays.
+            return_dict (bool, optional): Whether to return a dictionary (or Results object) instead of a tuple. Defaults to model config.
+            conf (float): Confidence threshold for Non-Maximum Suppression (NMS). Defaults to 0.4.
+            iou (float): IoU threshold for NMS. Defaults to 0.7.
+            max_det (int): Maximum number of detections allowed per image. Defaults to 100.
+            verbose (bool): Whether to print verbose output during inference. Defaults to False.
+            **inference_kwargs: Additional arguments supported by the Ultralytics predictor (e.g., `imgsz`, `device`).
+                                See all available arguments at https://docs.ultralytics.com/usage/cfg.
+        Returns:
+            Union[Tuple, Results]: A tuple containing the `Results` object if `return_dict` is False, otherwise the `Results` object directly.
+        Raises:
+            RuntimeError: If the internal YOLO wrapper is not initialized (e.g., model not loaded via `.from_pretrained()`).
+        """
+        if self.yolo is None:
+            raise RuntimeError("Call .from_pretrained(...) before forward().")
+        # accepted image url, PIL.Image or np.ndarray image
+        assert isinstance(model_input, (Image.Image, np.ndarray, str))
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        detector_kwargs = {"conf": conf, "iou": iou, "verbose": verbose, "max_det": max_det}
+        detector_kwargs.update(inference_kwargs)
+        results: Results = self.yolo.predict(model_input, **detector_kwargs)
+        if not return_dict:
+            return (results,)
+        return results

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+ultralytics==8.1.0
+torch==2.5.1
+torchvision==0.20.1
+omegaconf