| from transformers import PretrainedConfig | |
| class FalconPerceptionConfig(PretrainedConfig): | |
| model_type = "falcon_perception" | |
| def __init__( | |
| self, | |
| dim: int = 1024, | |
| n_layers: int = 28, | |
| n_heads: int = 16, | |
| head_dim: int = 128, | |
| n_kv_heads: int = 8, | |
| vocab_size: int = 65536, | |
| ffn_dim: int = 3072, | |
| norm_eps: float = 1e-5, | |
| max_seq_len: int = 8192, | |
| rope_theta: int = 10000, | |
| channel_size: int = 3, | |
| spatial_patch_size: int = 16, | |
| temporal_patch_size: int = 1, | |
| do_segmentation: bool = True, | |
| segm_out_dim: int = 256, | |
| num_segm_layers: int = 3, | |
| coord_enc_dim: int = 512, | |
| coord_dec_dim: int = 8192, | |
| coord_out_dim: int = 2048, | |
| coord_token_id: int = 240, | |
| size_enc_dim: int = 512, | |
| size_dec_dim: int = 8192, | |
| size_out_dim: int = 2048, | |
| size_token_id: int = 241, | |
| seg_token_id: int = 262, | |
| eos_id: int = 11, | |
| img_id: int = 227, | |
| image_cls_token_id: int = 244, | |
| image_reg_1_token_id: int = 245, | |
| image_reg_2_token_id: int = 246, | |
| image_reg_3_token_id: int = 247, | |
| image_reg_4_token_id: int = 248, | |
| img_end_id: int = 230, | |
| **kwargs, | |
| ): | |
| self.dim = dim | |
| self.n_layers = n_layers | |
| self.n_heads = n_heads | |
| self.head_dim = head_dim | |
| self.n_kv_heads = n_kv_heads | |
| self.vocab_size = vocab_size | |
| self.ffn_dim = ffn_dim | |
| self.norm_eps = norm_eps | |
| self.max_seq_len = max_seq_len | |
| self.rope_theta = rope_theta | |
| self.channel_size = channel_size | |
| self.spatial_patch_size = spatial_patch_size | |
| self.temporal_patch_size = temporal_patch_size | |
| self.do_segmentation = do_segmentation | |
| self.segm_out_dim = segm_out_dim | |
| self.num_segm_layers = num_segm_layers | |
| self.coord_enc_dim = coord_enc_dim | |
| self.coord_dec_dim = coord_dec_dim | |
| self.coord_out_dim = coord_out_dim | |
| self.coord_token_id = coord_token_id | |
| self.size_enc_dim = size_enc_dim | |
| self.size_dec_dim = size_dec_dim | |
| self.size_out_dim = size_out_dim | |
| self.size_token_id = size_token_id | |
| self.seg_token_id = seg_token_id | |
| self.eos_id = eos_id | |
| self.img_id = img_id | |
| self.image_cls_token_id = image_cls_token_id | |
| self.image_reg_1_token_id = image_reg_1_token_id | |
| self.image_reg_2_token_id = image_reg_2_token_id | |
| self.image_reg_3_token_id = image_reg_3_token_id | |
| self.image_reg_4_token_id = image_reg_4_token_id | |
| self.img_end_id = img_end_id | |
| super().__init__(**kwargs) | |