| from transformers import PretrainedConfig | |
| class CSDConfig(PretrainedConfig): | |
| def __init__( | |
| self, | |
| vit_input_resolution: int = 224, | |
| vit_patch_size: int = 14, | |
| vit_width: int = 1024, | |
| vit_layers: int = 24, | |
| vit_heads: int = 16, | |
| vit_output_dim: int = 768, | |
| **kwargs | |
| ) -> None: | |
| super(CSDConfig, self).__init__(**kwargs) | |
| self.vit_input_resolution = vit_input_resolution | |
| self.vit_patch_size = vit_patch_size | |
| self.vit_width = vit_width | |
| self.vit_layers = vit_layers | |
| self.vit_heads = vit_heads | |
| self.vit_output_dim = vit_output_dim | |