File size: 5,290 Bytes
8514022 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | from typing import Dict, List, Optional, Union
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class RTMDetConfig(PretrainedConfig):
"""
Configuration class for RTMDet models from OpenMMLab.
Args:
backbone_arch (`str`, *optional*, defaults to `"P5"`):
Architecture of the backbone. Can be either "P5" or "P6".
backbone_expand_ratio (`float`, *optional*, defaults to `0.5`):
Expand ratio of the backbone channels.
backbone_deepen_factor (`float`, *optional*, defaults to `1.0`):
Factor to deepen the backbone stages.
backbone_widen_factor (`float`, *optional*, defaults to `1.0`):
Factor to widen the backbone channels.
backbone_channel_attention (`bool`, *optional*, defaults to `True`):
Whether to use channel attention in the backbone.
neck_in_channels (`List[int]`, *optional*, defaults to `[256, 512, 1024]`):
Input channels for the neck.
neck_out_channels (`int`, *optional*, defaults to `256`):
Output channels for the neck.
neck_num_csp_blocks (`int`, *optional*, defaults to `3`):
Number of CSP blocks in the neck.
neck_expand_ratio (`float`, *optional*, defaults to `0.5`):
Expand ratio for the neck channels.
num_classes (`int`, *optional*, defaults to `80`):
Number of classes to predict.
head_in_channels (`int`, *optional*, defaults to `256`):
Input channels for the detection head.
head_stacked_convs (`int`, *optional*, defaults to `2`):
Number of stacked convolutions in the head.
head_feat_channels (`int`, *optional*, defaults to `256`):
Number of feature channels in the head.
head_with_objectness (`bool`, *optional*, defaults to `False`):
Whether to use objectness in the head.
head_exp_on_reg (`bool`, *optional*, defaults to `True`):
Whether to use exponential function on the regression branch.
head_share_conv (`bool`, *optional*, defaults to `True`):
Whether to share convolutions between classes in the head.
head_pred_kernel_size (`int`, *optional*, defaults to `1`):
Kernel size for the prediction layer in the head.
strides (`List[int]`, *optional*, defaults to `[8, 16, 32]`):
Strides for multi-scale feature maps.
input_size (`List[int]`, *optional*, defaults to `[640, 640]`):
Default input image size [width, height].
score_threshold (`float`, *optional*, defaults to `0.05`):
Score threshold for detections.
nms_threshold (`float`, *optional*, defaults to `0.6`):
NMS IoU threshold.
max_detections (`int`, *optional*, defaults to `100`):
Maximum number of detections to return.
**kwargs:
Additional parameters passed to the parent class.
"""
model_type = "rtmdet"
def __init__(
self,
backbone_arch: str = "P5",
backbone_expand_ratio: float = 0.5,
backbone_deepen_factor: float = 1.0,
backbone_widen_factor: float = 1.0,
backbone_channel_attention: bool = True,
neck_in_channels: List[int] = [256, 512, 1024],
neck_out_channels: int = 256,
neck_num_csp_blocks: int = 3,
neck_expand_ratio: float = 0.5,
num_classes: int = 80,
head_in_channels: int = 256,
head_stacked_convs: int = 2,
head_feat_channels: int = 256,
head_with_objectness: bool = False,
head_exp_on_reg: bool = True,
head_share_conv: bool = True,
head_pred_kernel_size: int = 1,
strides: List[int] = [8, 16, 32],
input_size: List[int] = [640, 640],
score_threshold: float = 0.05,
nms_threshold: float = 0.6,
max_detections: int = 100,
**kwargs
):
super().__init__(**kwargs)
# Backbone config
self.backbone_arch = backbone_arch
self.backbone_expand_ratio = backbone_expand_ratio
self.backbone_deepen_factor = backbone_deepen_factor
self.backbone_widen_factor = backbone_widen_factor
self.backbone_channel_attention = backbone_channel_attention
# Neck config
self.neck_in_channels = neck_in_channels
self.neck_out_channels = neck_out_channels
self.neck_num_csp_blocks = neck_num_csp_blocks
self.neck_expand_ratio = neck_expand_ratio
# Head config
self.num_classes = num_classes
self.head_in_channels = head_in_channels
self.head_stacked_convs = head_stacked_convs
self.head_feat_channels = head_feat_channels
self.head_with_objectness = head_with_objectness
self.head_exp_on_reg = head_exp_on_reg
self.head_share_conv = head_share_conv
self.head_pred_kernel_size = head_pred_kernel_size
self.strides = strides
# Inference config
self.input_size = input_size
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
self.max_detections = max_detections |