from typing import Dict, List, Optional, Union from transformers.configuration_utils import PretrainedConfig from transformers.utils import logging logger = logging.get_logger(__name__) class RTMDetConfig(PretrainedConfig): """ Configuration class for RTMDet models from OpenMMLab. Args: backbone_arch (`str`, *optional*, defaults to `"P5"`): Architecture of the backbone. Can be either "P5" or "P6". backbone_expand_ratio (`float`, *optional*, defaults to `0.5`): Expand ratio of the backbone channels. backbone_deepen_factor (`float`, *optional*, defaults to `1.0`): Factor to deepen the backbone stages. backbone_widen_factor (`float`, *optional*, defaults to `1.0`): Factor to widen the backbone channels. backbone_channel_attention (`bool`, *optional*, defaults to `True`): Whether to use channel attention in the backbone. neck_in_channels (`List[int]`, *optional*, defaults to `[256, 512, 1024]`): Input channels for the neck. neck_out_channels (`int`, *optional*, defaults to `256`): Output channels for the neck. neck_num_csp_blocks (`int`, *optional*, defaults to `3`): Number of CSP blocks in the neck. neck_expand_ratio (`float`, *optional*, defaults to `0.5`): Expand ratio for the neck channels. num_classes (`int`, *optional*, defaults to `80`): Number of classes to predict. head_in_channels (`int`, *optional*, defaults to `256`): Input channels for the detection head. head_stacked_convs (`int`, *optional*, defaults to `2`): Number of stacked convolutions in the head. head_feat_channels (`int`, *optional*, defaults to `256`): Number of feature channels in the head. head_with_objectness (`bool`, *optional*, defaults to `False`): Whether to use objectness in the head. head_exp_on_reg (`bool`, *optional*, defaults to `True`): Whether to use exponential function on the regression branch. head_share_conv (`bool`, *optional*, defaults to `True`): Whether to share convolutions between classes in the head. head_pred_kernel_size (`int`, *optional*, defaults to `1`): Kernel size for the prediction layer in the head. strides (`List[int]`, *optional*, defaults to `[8, 16, 32]`): Strides for multi-scale feature maps. input_size (`List[int]`, *optional*, defaults to `[640, 640]`): Default input image size [width, height]. score_threshold (`float`, *optional*, defaults to `0.05`): Score threshold for detections. nms_threshold (`float`, *optional*, defaults to `0.6`): NMS IoU threshold. max_detections (`int`, *optional*, defaults to `100`): Maximum number of detections to return. **kwargs: Additional parameters passed to the parent class. """ model_type = "rtmdet" def __init__( self, backbone_arch: str = "P5", backbone_expand_ratio: float = 0.5, backbone_deepen_factor: float = 1.0, backbone_widen_factor: float = 1.0, backbone_channel_attention: bool = True, neck_in_channels: List[int] = [256, 512, 1024], neck_out_channels: int = 256, neck_num_csp_blocks: int = 3, neck_expand_ratio: float = 0.5, num_classes: int = 80, head_in_channels: int = 256, head_stacked_convs: int = 2, head_feat_channels: int = 256, head_with_objectness: bool = False, head_exp_on_reg: bool = True, head_share_conv: bool = True, head_pred_kernel_size: int = 1, strides: List[int] = [8, 16, 32], input_size: List[int] = [640, 640], score_threshold: float = 0.05, nms_threshold: float = 0.6, max_detections: int = 100, **kwargs ): super().__init__(**kwargs) # Backbone config self.backbone_arch = backbone_arch self.backbone_expand_ratio = backbone_expand_ratio self.backbone_deepen_factor = backbone_deepen_factor self.backbone_widen_factor = backbone_widen_factor self.backbone_channel_attention = backbone_channel_attention # Neck config self.neck_in_channels = neck_in_channels self.neck_out_channels = neck_out_channels self.neck_num_csp_blocks = neck_num_csp_blocks self.neck_expand_ratio = neck_expand_ratio # Head config self.num_classes = num_classes self.head_in_channels = head_in_channels self.head_stacked_convs = head_stacked_convs self.head_feat_channels = head_feat_channels self.head_with_objectness = head_with_objectness self.head_exp_on_reg = head_exp_on_reg self.head_share_conv = head_share_conv self.head_pred_kernel_size = head_pred_kernel_size self.strides = strides # Inference config self.input_size = input_size self.score_threshold = score_threshold self.nms_threshold = nms_threshold self.max_detections = max_detections