"""TIPSv2 model configuration.""" from transformers import PretrainedConfig class TIPSv2ImageConfig(PretrainedConfig): """Configuration for TIPSv2 vision-language model.""" model_type = "tipsv2" def __init__( self, model_variant="base", hidden_size=768, patch_size=14, image_size=448, ffn_layer="mlp", init_values=1.0, num_register_tokens=1, **kwargs, ): super().__init__(**kwargs) self.model_variant = model_variant self.hidden_size = hidden_size self.patch_size = patch_size self.image_size = image_size self.ffn_layer = ffn_layer self.init_values = init_values self.num_register_tokens = num_register_tokens