rtmdet-m / configuration_rtmdet.py

Add rtmdet-m RTMW/RTMDet HF port

5bd0bd8 verified 11 days ago

5.29 kB

	from typing import Dict, List, Optional, Union

	from transformers.configuration_utils import PretrainedConfig
	from transformers.utils import logging


	logger = logging.get_logger(__name__)


	class RTMDetConfig(PretrainedConfig):
	"""
	Configuration class for RTMDet models from OpenMMLab.

	Args:
	backbone_arch (`str`, optional, defaults to `"P5"`):
	Architecture of the backbone. Can be either "P5" or "P6".
	backbone_expand_ratio (`float`, optional, defaults to `0.5`):
	Expand ratio of the backbone channels.
	backbone_deepen_factor (`float`, optional, defaults to `1.0`):
	Factor to deepen the backbone stages.
	backbone_widen_factor (`float`, optional, defaults to `1.0`):
	Factor to widen the backbone channels.
	backbone_channel_attention (`bool`, optional, defaults to `True`):
	Whether to use channel attention in the backbone.
	neck_in_channels (`List[int]`, optional, defaults to `[256, 512, 1024]`):
	Input channels for the neck.
	neck_out_channels (`int`, optional, defaults to `256`):
	Output channels for the neck.
	neck_num_csp_blocks (`int`, optional, defaults to `3`):
	Number of CSP blocks in the neck.
	neck_expand_ratio (`float`, optional, defaults to `0.5`):
	Expand ratio for the neck channels.
	num_classes (`int`, optional, defaults to `80`):
	Number of classes to predict.
	head_in_channels (`int`, optional, defaults to `256`):
	Input channels for the detection head.
	head_stacked_convs (`int`, optional, defaults to `2`):
	Number of stacked convolutions in the head.
	head_feat_channels (`int`, optional, defaults to `256`):
	Number of feature channels in the head.
	head_with_objectness (`bool`, optional, defaults to `False`):
	Whether to use objectness in the head.
	head_exp_on_reg (`bool`, optional, defaults to `True`):
	Whether to use exponential function on the regression branch.
	head_share_conv (`bool`, optional, defaults to `True`):
	Whether to share convolutions between classes in the head.
	head_pred_kernel_size (`int`, optional, defaults to `1`):
	Kernel size for the prediction layer in the head.
	strides (`List[int]`, optional, defaults to `[8, 16, 32]`):
	Strides for multi-scale feature maps.
	input_size (`List[int]`, optional, defaults to `[640, 640]`):
	Default input image size [width, height].
	score_threshold (`float`, optional, defaults to `0.05`):
	Score threshold for detections.
	nms_threshold (`float`, optional, defaults to `0.6`):
	NMS IoU threshold.
	max_detections (`int`, optional, defaults to `100`):
	Maximum number of detections to return.
	**kwargs:
	Additional parameters passed to the parent class.
	"""

	model_type = "rtmdet"

	def __init__(
	self,
	backbone_arch: str = "P5",
	backbone_expand_ratio: float = 0.5,
	backbone_deepen_factor: float = 1.0,
	backbone_widen_factor: float = 1.0,
	backbone_channel_attention: bool = True,
	neck_in_channels: List[int] = [256, 512, 1024],
	neck_out_channels: int = 256,
	neck_num_csp_blocks: int = 3,
	neck_expand_ratio: float = 0.5,
	num_classes: int = 80,
	head_in_channels: int = 256,
	head_stacked_convs: int = 2,
	head_feat_channels: int = 256,
	head_with_objectness: bool = False,
	head_exp_on_reg: bool = True,
	head_share_conv: bool = True,
	head_pred_kernel_size: int = 1,
	strides: List[int] = [8, 16, 32],
	input_size: List[int] = [640, 640],
	score_threshold: float = 0.05,
	nms_threshold: float = 0.6,
	max_detections: int = 100,
	**kwargs
	):
	super().__init__(**kwargs)

	# Backbone config
	self.backbone_arch = backbone_arch
	self.backbone_expand_ratio = backbone_expand_ratio
	self.backbone_deepen_factor = backbone_deepen_factor
	self.backbone_widen_factor = backbone_widen_factor
	self.backbone_channel_attention = backbone_channel_attention

	# Neck config
	self.neck_in_channels = neck_in_channels
	self.neck_out_channels = neck_out_channels
	self.neck_num_csp_blocks = neck_num_csp_blocks
	self.neck_expand_ratio = neck_expand_ratio

	# Head config
	self.num_classes = num_classes
	self.head_in_channels = head_in_channels
	self.head_stacked_convs = head_stacked_convs
	self.head_feat_channels = head_feat_channels
	self.head_with_objectness = head_with_objectness
	self.head_exp_on_reg = head_exp_on_reg
	self.head_share_conv = head_share_conv
	self.head_pred_kernel_size = head_pred_kernel_size
	self.strides = strides

	# Inference config
	self.input_size = input_size
	self.score_threshold = score_threshold
	self.nms_threshold = nms_threshold
	self.max_detections = max_detections