| from typing import Dict, List, Optional, Union |
|
|
| from transformers.configuration_utils import PretrainedConfig |
| from transformers.utils import logging |
|
|
|
|
| logger = logging.get_logger(__name__) |
|
|
|
|
| class RTMDetConfig(PretrainedConfig): |
| """ |
| Configuration class for RTMDet models from OpenMMLab. |
| |
| Args: |
| backbone_arch (`str`, *optional*, defaults to `"P5"`): |
| Architecture of the backbone. Can be either "P5" or "P6". |
| backbone_expand_ratio (`float`, *optional*, defaults to `0.5`): |
| Expand ratio of the backbone channels. |
| backbone_deepen_factor (`float`, *optional*, defaults to `1.0`): |
| Factor to deepen the backbone stages. |
| backbone_widen_factor (`float`, *optional*, defaults to `1.0`): |
| Factor to widen the backbone channels. |
| backbone_channel_attention (`bool`, *optional*, defaults to `True`): |
| Whether to use channel attention in the backbone. |
| neck_in_channels (`List[int]`, *optional*, defaults to `[256, 512, 1024]`): |
| Input channels for the neck. |
| neck_out_channels (`int`, *optional*, defaults to `256`): |
| Output channels for the neck. |
| neck_num_csp_blocks (`int`, *optional*, defaults to `3`): |
| Number of CSP blocks in the neck. |
| neck_expand_ratio (`float`, *optional*, defaults to `0.5`): |
| Expand ratio for the neck channels. |
| num_classes (`int`, *optional*, defaults to `80`): |
| Number of classes to predict. |
| head_in_channels (`int`, *optional*, defaults to `256`): |
| Input channels for the detection head. |
| head_stacked_convs (`int`, *optional*, defaults to `2`): |
| Number of stacked convolutions in the head. |
| head_feat_channels (`int`, *optional*, defaults to `256`): |
| Number of feature channels in the head. |
| head_with_objectness (`bool`, *optional*, defaults to `False`): |
| Whether to use objectness in the head. |
| head_exp_on_reg (`bool`, *optional*, defaults to `True`): |
| Whether to use exponential function on the regression branch. |
| head_share_conv (`bool`, *optional*, defaults to `True`): |
| Whether to share convolutions between classes in the head. |
| head_pred_kernel_size (`int`, *optional*, defaults to `1`): |
| Kernel size for the prediction layer in the head. |
| strides (`List[int]`, *optional*, defaults to `[8, 16, 32]`): |
| Strides for multi-scale feature maps. |
| input_size (`List[int]`, *optional*, defaults to `[640, 640]`): |
| Default input image size [width, height]. |
| score_threshold (`float`, *optional*, defaults to `0.05`): |
| Score threshold for detections. |
| nms_threshold (`float`, *optional*, defaults to `0.6`): |
| NMS IoU threshold. |
| max_detections (`int`, *optional*, defaults to `100`): |
| Maximum number of detections to return. |
| **kwargs: |
| Additional parameters passed to the parent class. |
| """ |
|
|
| model_type = "rtmdet" |
|
|
| def __init__( |
| self, |
| backbone_arch: str = "P5", |
| backbone_expand_ratio: float = 0.5, |
| backbone_deepen_factor: float = 1.0, |
| backbone_widen_factor: float = 1.0, |
| backbone_channel_attention: bool = True, |
| neck_in_channels: List[int] = [256, 512, 1024], |
| neck_out_channels: int = 256, |
| neck_num_csp_blocks: int = 3, |
| neck_expand_ratio: float = 0.5, |
| num_classes: int = 80, |
| head_in_channels: int = 256, |
| head_stacked_convs: int = 2, |
| head_feat_channels: int = 256, |
| head_with_objectness: bool = False, |
| head_exp_on_reg: bool = True, |
| head_share_conv: bool = True, |
| head_pred_kernel_size: int = 1, |
| strides: List[int] = [8, 16, 32], |
| input_size: List[int] = [640, 640], |
| score_threshold: float = 0.05, |
| nms_threshold: float = 0.6, |
| max_detections: int = 100, |
| **kwargs |
| ): |
| super().__init__(**kwargs) |
|
|
| |
| self.backbone_arch = backbone_arch |
| self.backbone_expand_ratio = backbone_expand_ratio |
| self.backbone_deepen_factor = backbone_deepen_factor |
| self.backbone_widen_factor = backbone_widen_factor |
| self.backbone_channel_attention = backbone_channel_attention |
| |
| |
| self.neck_in_channels = neck_in_channels |
| self.neck_out_channels = neck_out_channels |
| self.neck_num_csp_blocks = neck_num_csp_blocks |
| self.neck_expand_ratio = neck_expand_ratio |
| |
| |
| self.num_classes = num_classes |
| self.head_in_channels = head_in_channels |
| self.head_stacked_convs = head_stacked_convs |
| self.head_feat_channels = head_feat_channels |
| self.head_with_objectness = head_with_objectness |
| self.head_exp_on_reg = head_exp_on_reg |
| self.head_share_conv = head_share_conv |
| self.head_pred_kernel_size = head_pred_kernel_size |
| self.strides = strides |
| |
| |
| self.input_size = input_size |
| self.score_threshold = score_threshold |
| self.nms_threshold = nms_threshold |
| self.max_detections = max_detections |