tipsv2-so400m14 / configuration_tips.py
gberton's picture
Upload configuration_tips.py with huggingface_hub
96352c1 verified
"""TIPSv2 model configuration."""
from transformers import PretrainedConfig
class TIPSv2Config(PretrainedConfig):
"""Configuration for TIPSv2 vision-language model."""
model_type = "tipsv2"
def __init__(
self,
# Vision encoder
vision_fn="vit_base",
embed_dim=768,
patch_size=14,
img_size=448,
ffn_layer="mlp",
init_values=1.0,
num_register_tokens=1,
# Text encoder
text_hidden_size=768,
text_mlp_dim=3072,
text_num_heads=12,
text_num_layers=12,
vocab_size=32000,
max_len=64,
# Contrastive
temperature=0.01,
**kwargs,
):
super().__init__(**kwargs)
self.vision_fn = vision_fn
self.embed_dim = embed_dim
self.patch_size = patch_size
self.img_size = img_size
self.ffn_layer = ffn_layer
self.init_values = init_values
self.num_register_tokens = num_register_tokens
self.text_hidden_size = text_hidden_size
self.text_mlp_dim = text_mlp_dim
self.text_num_heads = text_num_heads
self.text_num_layers = text_num_layers
self.vocab_size = vocab_size
self.max_len = max_len
self.temperature = temperature