| from transformers import PretrainedConfig |
|
|
|
|
| class TILAConfig(PretrainedConfig): |
| model_type = "tila" |
|
|
| def __init__( |
| self, |
| |
| resnet_layers=(3, 4, 6, 3), |
| backbone_out_dim=2048, |
| vit_dim=256, |
| vit_heads=8, |
| vit_blocks=3, |
| vit_grid_shape=(14, 14), |
| joint_feature_size=128, |
| |
| text_model_name="microsoft/BiomedVLP-BioViL-T", |
| text_hidden_size=768, |
| |
| classifier_hidden_dim=64, |
| |
| image_size=448, |
| |
| threshold_default=0.5000, |
| threshold_bestf1=0.2886, |
| threshold_spec95=0.6370, |
| **kwargs, |
| ): |
| super().__init__(**kwargs) |
| self.resnet_layers = resnet_layers |
| self.backbone_out_dim = backbone_out_dim |
| self.vit_dim = vit_dim |
| self.vit_heads = vit_heads |
| self.vit_blocks = vit_blocks |
| self.vit_grid_shape = vit_grid_shape |
| self.joint_feature_size = joint_feature_size |
| self.text_model_name = text_model_name |
| self.text_hidden_size = text_hidden_size |
| self.classifier_hidden_dim = classifier_hidden_dim |
| self.image_size = image_size |
| self.threshold_default = threshold_default |
| self.threshold_bestf1 = threshold_bestf1 |
| self.threshold_spec95 = threshold_spec95 |
|
|