File size: 824 Bytes
ff7fe7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
from transformers import PretrainedConfig
class MultimodalConfig(PretrainedConfig):
model_type = "multimodal_embedder"
def __init__(
self,
text_model_name="newmindai/modernbert-base-tr-uncased-allnli-stsb",
vision_model_name="facebook/dinov2-base",
text_dim=768,
image_dim=768,
embed_dim=384,
temperature_init=1/0.07,
use_mean_pooling_for_text=True,
**kwargs
):
super().__init__(**kwargs)
self.text_model_name = text_model_name
self.vision_model_name = vision_model_name
self.text_dim = text_dim
self.image_dim = image_dim
self.embed_dim = embed_dim
self.temperature_init = temperature_init
self.use_mean_pooling_for_text = use_mean_pooling_for_text
|